/*
 * Decompiled with CFR 0.152.
 */
package com.splunk.df.search.compute.spark;

import com.splunk.commons.ast.nodes.CommandNode;
import com.splunk.commons.ast.nodes.CommandType;
import com.splunk.commons.ast.nodes.IOrdering;
import com.splunk.commons.ast.nodes.commands.DedupCommand;
import com.splunk.commons.ast.nodes.commands.FieldProperties;
import com.splunk.commons.ast.nodes.commands.JoinCommand;
import com.splunk.commons.ast.nodes.commands.RdInCommand;
import com.splunk.commons.ast.nodes.commands.SortCommand;
import com.splunk.commons.ast.nodes.commands.UnionCommand;
import com.splunk.commons.ast.nodes.expressions.FieldNode;
import com.splunk.commons.ast.nodes.expressions.FieldType;
import com.splunk.commons.ast.nodes.expressions.JoinNode;
import com.splunk.commons.ast.nodes.expressions.JoinType;
import com.splunk.commons.ast.nodes.expressions.SortNode;
import com.splunk.commons.ast.nodes.expressions.SortOrder;
import com.splunk.df.search.compute.Actor;
import com.splunk.df.search.compute.ComputeEngine;
import com.splunk.df.search.compute.ComputeEngineConstants;
import com.splunk.df.search.compute.ComputeEngineContext;
import com.splunk.df.search.compute.DistributedDataset;
import com.splunk.df.search.compute.ExecutionHints;
import com.splunk.df.search.compute.Exploder;
import com.splunk.df.search.compute.Filter;
import com.splunk.df.search.compute.MapPartitioner;
import com.splunk.df.search.compute.Mapper;
import com.splunk.df.search.compute.Reducer;
import com.splunk.df.search.compute.Reporter;
import com.splunk.df.search.compute.SearchResult;
import com.splunk.df.search.compute.SearchResultComparator;
import com.splunk.df.search.compute.SearchResultFactory;
import com.splunk.df.search.compute.SplunkConfExtractor;
import com.splunk.df.search.compute.objects._Long;
import com.splunk.df.search.compute.sdk.Pair;
import com.splunk.df.search.compute.transformers.FieldExtractor;
import com.splunk.df.util.PrimeUtils;
import com.splunk.df.util.Utils;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.log4j.Logger;
import org.apache.spark.HashPartitioner;
import org.apache.spark.Partitioner;
import org.apache.spark.api.java.JavaFutureAction;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.Optional;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.storage.StorageLevel;
import scala.Tuple2;
import scala.Tuple4;

public class SparkDistributedDataset
implements DistributedDataset,
ComputeEngineConstants {
    static final Logger logger = Logger.getLogger(SparkDistributedDataset.class);
    private final JavaRDD<SearchResult> srRdd;
    private final JavaSparkContext jsc;
    private final FieldExtractor.ExtractionHint fieldExtractionHint;
    private final HashSet<SearchResult.FieldMeta> reducedByFields;
    private long n = -1L;
    private final DistributedDataset sridd;
    private final ComputeEngine engine;
    private ComputeEngineContext ctx;
    private final boolean ordered;

    public SparkDistributedDataset(ComputeEngine engine, ComputeEngineContext ctx, JavaSparkContext jsc, JavaRDD<SearchResult> srRdd, DistributedDataset sridd, FieldExtractor.ExtractionHint fieldExtractionHint, long n, HashSet<SearchResult.FieldMeta> reducedByFields, boolean ordered) {
        this.engine = engine;
        this.ctx = ctx;
        if (srRdd == null) {
            logger.error((Object)"srRdd cannot be null");
            throw new IllegalArgumentException("sr rdd cannot be null");
        }
        this.srRdd = srRdd;
        this.sridd = sridd;
        this.jsc = jsc;
        this.fieldExtractionHint = fieldExtractionHint;
        this.n = n;
        this.reducedByFields = reducedByFields;
        this.ordered = ordered;
    }

    private JavaRDD<SearchResult> getSrRdd() {
        return this.srRdd;
    }

    public SparkDistributedDataset(ComputeEngine engine, ComputeEngineContext ctx, JavaSparkContext jsc) {
        this.engine = engine;
        this.srRdd = jsc.emptyRDD();
        this.jsc = jsc;
        this.sridd = null;
        this.reducedByFields = new HashSet();
        this.fieldExtractionHint = FieldExtractor.ExtractionHint.NOT_PRECOMPUTED;
        this.ctx = ctx;
        this.ordered = false;
    }

    public SparkDistributedDataset(ComputeEngineContext ctx, JavaSparkContext jsc) {
        this(null, ctx, jsc);
    }

    public String toString() {
        return this.srRdd.toDebugString();
    }

    @Override
    public Iterator<SearchResult> retrieve() {
        Iterator<SearchResult> localIter = this.srRdd.collect().iterator();
        return localIter;
    }

    @Override
    public DistributedDataset transform(Mapper mapper) {
        return SparkDistributedDataset._map(this.engine, this.ctx, this.jsc, this.srRdd, this.sridd, mapper, this.n, this.reducedByFields, this.ordered);
    }

    private static DistributedDataset _map(ComputeEngine engine, ComputeEngineContext ctx, JavaSparkContext jsc, JavaRDD<SearchResult> srRdd, DistributedDataset sridd, final Mapper mapper, long n, HashSet<SearchResult.FieldMeta> reducedByFields, boolean ordered) {
        JavaRDD ret = srRdd.map((Function)new Function<SearchResult, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public SearchResult call(SearchResult sr) throws Exception {
                return mapper.map(sr);
            }
        });
        return new SparkDistributedDataset(engine, ctx, jsc, (JavaRDD<SearchResult>)ret, sridd, mapper.fieldExtractionHint(), n, reducedByFields, ordered);
    }

    private static DistributedDataset _sort(ComputeEngine engine, ComputeEngineContext ctx, JavaSparkContext jsc, JavaRDD<SearchResult> srRdd, DistributedDataset sridd, SortCommand sortCmd, IOrdering[] orderingFields, FieldExtractor.ExtractionHint fieldExtractionHint, long n, HashSet<SearchResult.FieldMeta> reducedByFields, int headImposedLimit) {
        JavaPairRDD sortedSrPairRdd;
        IOrdering[] fields = orderingFields;
        final SearchResult.FieldMeta[] fieldsMeta = new SearchResult.FieldMeta[fields.length];
        int len = fieldsMeta.length;
        for (int i = 0; i < len; ++i) {
            fieldsMeta[i] = SearchResult.FieldMeta.newFieldMeta(fields[i].getField().getFieldName());
        }
        long count = 0L;
        if (sortCmd != null) {
            fields = sortCmd.getBy();
            count = sortCmd.getCount();
        }
        if (headImposedLimit > 0) {
            long tempCount = count;
            count = count == 0L ? (long)headImposedLimit : Math.min(count, (long)headImposedLimit);
            logger.info((Object)String.format("sdd sort: count updated from: %d to head imposed limit: %d", tempCount, count));
        }
        logger.info((Object)String.format("Sort by fields: %s", Utils.printByFields(orderingFields)));
        final int maxSeed = (int)Utils.getMaxSortSeed();
        logger.info((Object)String.format("will use max sort seed: %d", maxSeed));
        final IOrdering[] tempFields = fields;
        if (count == 0L && ctx.hasContext("execution.hints")) {
            ExecutionHints hints = (ExecutionHints)ctx.get("execution.hints");
            int numFields = 2;
            if (!reducedByFields.isEmpty()) {
                numFields = reducedByFields.size();
            }
            srRdd = srRdd.persist(StorageLevel.DISK_ONLY());
            if (engine != null) {
                engine.setCurrentJobDescription(String.format("counting input dataset before sorting using by fields: %s since all records considered", Utils.printByFields(orderingFields)));
            }
            long numRecs = srRdd.count();
            int requiredPars = hints.defaultPartitionCount(numRecs, numFields);
            int currPars = srRdd.getNumPartitions();
            if (Utils.isSparseDataset(currPars, requiredPars)) {
                if (ctx.hasContext("numCores")) {
                    int dfcNumCores = (Integer)ctx.get("numCores");
                    requiredPars = Math.max(dfcNumCores, requiredPars);
                }
                if (requiredPars < currPars) {
                    srRdd = srRdd.coalesce(requiredPars, false);
                    logger.info((Object)String.format("coalescing the input rdd from: %d pars to: %d pars since rdd is too sparse which can slow down ordered retrieval of records to search head", currPars, requiredPars));
                }
            }
        }
        if (engine != null) {
            engine.setCurrentJobDescription(String.format("preparing dataset before sorting using by fields: %s", Utils.printByFields(orderingFields)));
        }
        JavaPairRDD srPairRdd = srRdd.mapToPair((PairFunction)new PairFunction<SearchResult, SearchResult, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public Tuple2<SearchResult, SearchResult> call(SearchResult sr) throws Exception {
                int len = tempFields.length;
                SearchResult.FieldMeta[] fieldNames = new SearchResult.FieldMeta[len];
                Object[] fieldValues = new Object[len];
                SearchResult.SRHashMap<SearchResult.FieldMeta, Object> key = new SearchResult.SRHashMap<SearchResult.FieldMeta, Object>(len * 2);
                for (int i = 0; i < len; ++i) {
                    IOrdering field = tempFields[i];
                    SearchResult.FieldMeta fieldName = fieldsMeta[i];
                    Object val = sr.getFieldValue(fieldName);
                    if (val == null) {
                        val = new SearchResultFactory.NullField(false);
                    }
                    FieldType fieldType = field.getFieldType();
                    val = SearchResultFactory.getInstance().convertValue(val, fieldType);
                    key.put(fieldName, val);
                    fieldNames[i] = fieldName;
                    fieldValues[i] = val;
                }
                SearchResult keySr = SearchResultFactory.getInstance().createSearchResult(key, fieldNames, fieldValues);
                if (maxSeed > 0) {
                    keySr.setSeed(new Random().nextInt(maxSeed));
                }
                return new Tuple2((Object)keySr, (Object)sr);
            }
        });
        final long finalCount = count;
        SearchResultComparator comp = new SearchResultComparator(fields);
        if (finalCount > 0L) {
            logger.info((Object)String.format("sdd sort: will sort within all partitions first and remove records which are below %d and different", finalCount));
            int numPars = srPairRdd.getNumPartitions();
            if (sortCmd.getSource() != null && sortCmd.getSource() instanceof JoinCommand) {
                double boost = Utils.getPartitionBoostPostJoin();
                boost = Math.max(1.0, boost);
                numPars = (int)((double)numPars * boost);
                numPars = Math.max(numPars, 1);
                logger.info((Object)String.format("setting partitions to: %d for per partition sort since post join", numPars));
            }
            logger.info((Object)String.format("sdd sort: partitioning input dataset to: %d partitions before per partition sort", numPars));
            if (engine != null) {
                engine.setCurrentJobDescription(String.format("repartitioning and sorting within partitions: %s", Utils.printByFields(orderingFields)));
            }
            sortedSrPairRdd = srPairRdd.repartitionAndSortWithinPartitions((Partitioner)new HashPartitioner(numPars), (Comparator)comp);
            if (engine != null) {
                engine.setCurrentJobDescription(String.format("taking top %d records from each partition: %s", finalCount, Utils.printByFields(orderingFields)));
            }
            sortedSrPairRdd = sortedSrPairRdd.mapPartitionsToPair((PairFlatMapFunction)new PairFlatMapFunction<Iterator<Tuple2<SearchResult, SearchResult>>, SearchResult, SearchResult>(){
                private static final long serialVersionUID = 1L;

                public Iterator<Tuple2<SearchResult, SearchResult>> call(Iterator<Tuple2<SearchResult, SearchResult>> srKvs) throws Exception {
                    LinkedList<Tuple2<SearchResult, SearchResult>> filteredRecs = new LinkedList<Tuple2<SearchResult, SearchResult>>();
                    long counter = 0L;
                    while (srKvs.hasNext()) {
                        Tuple2<SearchResult, SearchResult> srKv = srKvs.next();
                        if (++counter > finalCount) break;
                        filteredRecs.add(srKv);
                    }
                    while (srKvs.hasNext()) {
                        srKvs.next();
                    }
                    return filteredRecs.iterator();
                }
            }, true);
            if (engine != null) {
                engine.setCurrentJobDescription(String.format("global sort based on top %d records in each partition: %s", finalCount, Utils.printByFields(orderingFields)));
            }
            sortedSrPairRdd = sortedSrPairRdd.sortByKey((Comparator)comp);
        } else {
            if (engine != null) {
                engine.setCurrentJobDescription(String.format("global sort with by fields: %s", Utils.printByFields(orderingFields)));
            }
            logger.info((Object)String.format("sdd sort: sorting the whole dataset since sort output is: %d, number of partitions: %d", finalCount, srPairRdd.getNumPartitions()));
            sortedSrPairRdd = srPairRdd.sortByKey((Comparator)comp);
        }
        srRdd = sortedSrPairRdd.map((Function)new Function<Tuple2<SearchResult, SearchResult>, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public SearchResult call(Tuple2<SearchResult, SearchResult> pair) throws Exception {
                return (SearchResult)pair._2();
            }
        });
        if (count > 0L) {
            count = Math.min(Utils.getMaxLimitRecords(), count);
            logger.info((Object)String.format("sort has been invoked with record limits: %d, hence dag: %s ------> would be executed to retrieve the required records", count, srRdd.toDebugString()));
            if (engine != null) {
                engine.setCurrentJobDescription(String.format("retrieving top %d sorted records", count));
            }
            List srs = srRdd.take((int)count);
            srRdd = jsc.parallelize(srs, 1);
            logger.info((Object)String.format("Curtailed sort to: %d records and distributed back to workers", count));
        }
        return new SparkDistributedDataset(engine, ctx, jsc, (JavaRDD<SearchResult>)srRdd, sridd, fieldExtractionHint, n, reducedByFields, true);
    }

    private JavaSparkContext getJavaSparkContext() {
        return this.jsc;
    }

    @Override
    public DistributedDataset sort(IOrdering[] fields) {
        return SparkDistributedDataset._sort(this.engine, this.ctx, this.jsc, this.srRdd, this.sridd, null, fields, this.fieldExtractionHint, this.n, this.reducedByFields, -1);
    }

    @Override
    public DistributedDataset sort(SortCommand sortCmd, int headImposedLimit) {
        return SparkDistributedDataset._sort(this.engine, this.ctx, this.jsc, this.srRdd, this.sridd, sortCmd, sortCmd.getBy(), this.fieldExtractionHint, this.n, this.reducedByFields, headImposedLimit);
    }

    @Override
    public DistributedDataset transform(Exploder exploder) {
        return SparkDistributedDataset._explode(this.engine, this.ctx, this.jsc, this.srRdd, this.sridd, exploder, this.n, this.reducedByFields);
    }

    private static DistributedDataset _explode(ComputeEngine engine, ComputeEngineContext ctx, JavaSparkContext jsc, JavaRDD<SearchResult> srRdd, DistributedDataset sridd, final Exploder exploder, long n, HashSet<SearchResult.FieldMeta> reducedByFields) {
        JavaRDD ret = srRdd.flatMap((FlatMapFunction)new FlatMapFunction<SearchResult, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public Iterator<SearchResult> call(SearchResult event) throws Exception {
                return exploder.explode(event);
            }
        });
        return new SparkDistributedDataset(engine, ctx, jsc, (JavaRDD<SearchResult>)ret, sridd, exploder.fieldExtractionHint(), n, reducedByFields, false);
    }

    @Override
    public long count() {
        return this.srRdd.count();
    }

    @Override
    public DistributedDataset cacheMem() {
        JavaRDD ret = this.srRdd.cache();
        return new SparkDistributedDataset(this.engine, this.ctx, this.jsc, (JavaRDD<SearchResult>)ret, this.sridd, this.fieldExtractionHint, this.n, this.reducedByFields, this.ordered);
    }

    @Override
    public DistributedDataset cacheDisk() {
        if (!this.srRdd.getStorageLevel().equals((Object)StorageLevel.NONE())) {
            logger.warn((Object)String.format("storage level set hence not enforcing another storage level", new Object[0]));
            return this;
        }
        JavaRDD ret = this.srRdd.persist(StorageLevel.apply((boolean)true, (boolean)false, (boolean)true, (int)1));
        return new SparkDistributedDataset(this.engine, this.ctx, this.jsc, (JavaRDD<SearchResult>)ret, this.sridd, this.fieldExtractionHint, this.n, this.reducedByFields, this.ordered);
    }

    private static long getHandleException(JavaFutureAction<Long> hdl) {
        long val;
        try {
            val = (Long)hdl.get();
        }
        catch (Throwable t) {
            throw new RuntimeException(t);
        }
        return val;
    }

    private static DistributedDataset _reduce(ComputeEngine engine, ComputeEngineContext ctx, DistributedDataset dd, DistributedDataset sridd, final Reducer reducer, long n) {
        JavaPairRDD metadataRDD;
        int numPars;
        JavaPairRDD srPairRdd;
        SparkDistributedDataset sdd = (SparkDistributedDataset)dd;
        if (reducer.limitResultset()) {
            HashMap<SearchResult.FieldMeta, Object> context = SplunkConfExtractor.extract(ctx);
            int limit = Integer.parseInt(context.get(LIST_MAX_SIZE).toString());
            sdd = (SparkDistributedDataset)sdd.limit(limit);
            logger.debug((Object)String.format("Input dataset has been limited: %d", limit));
        }
        JavaSparkContext jsc = sdd.getJavaSparkContext();
        JavaRDD srRdd = sdd.getSrRdd();
        String sid = (String)ctx.get("sid");
        final HashMap<SearchResult.FieldMeta, _Long> executionContext = new HashMap<SearchResult.FieldMeta, _Long>();
        HashSet<SearchResult.FieldMeta> reduceByFields = reducer.reduceByFields();
        logger.info((Object)String.format("sdd stats: reduce by fields: %s", reduceByFields.toString()));
        if (engine != null) {
            engine.setCurrentJobDescription(String.format("preparing dataset for reduction, by fields: %s", reduceByFields.toString()));
        }
        if (Utils.shrinkPostVolumeReduction((srPairRdd = srRdd.mapPartitionsToPair((PairFlatMapFunction)new PairFlatMapFunction<Iterator<SearchResult>, SearchResult, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public Iterator<Tuple2<SearchResult, SearchResult>> call(Iterator<SearchResult> srs) throws Exception {
                HashMap<SearchResult, SearchResult> srKvsTbl = new HashMap<SearchResult, SearchResult>(100);
                while (srs.hasNext()) {
                    SearchResult sr = srs.next();
                    SearchResult.SRContext parCtx = sr.getContext();
                    List<Pair<SearchResult, SearchResult>> pairs = reducer.splits(sr);
                    int size = pairs.size();
                    for (int i = 0; i < size; ++i) {
                        SearchResult key;
                        Pair<SearchResult, SearchResult> pair = pairs.get(i);
                        if (parCtx != null) {
                            parCtx.addSrsInPartition();
                        }
                        if ((key = pair.first()) == null) continue;
                        SearchResult val = pair.second();
                        SearchResult calcVal = (SearchResult)srKvsTbl.get(key);
                        if (calcVal == null) {
                            srKvsTbl.put(key, val);
                            continue;
                        }
                        srKvsTbl.put(key, reducer.reduce(calcVal, val));
                    }
                }
                ArrayList<Tuple2> retArr = new ArrayList<Tuple2>(srKvsTbl.size());
                for (Map.Entry kv : srKvsTbl.entrySet()) {
                    retArr.add(new Tuple2(kv.getKey(), kv.getValue()));
                }
                return retArr.iterator();
            }
        }, true)).getNumPartitions())) {
            logger.info((Object)String.format("partition shrinking process triggered: %d partitions", srPairRdd.getNumPartitions()));
            if (ctx.hasContext("execution.hints")) {
                int requiredPars;
                numPars = srPairRdd.getNumPartitions();
                int numCores = -1;
                if (ctx.hasContext("numCores")) {
                    numCores = (Integer)ctx.get("numCores");
                }
                JavaPairRDD cachedRdd = srPairRdd = srPairRdd.persist(StorageLevel.DISK_ONLY());
                if (engine != null) {
                    engine.setCurrentJobDescription(String.format("pipeline shrinkage: by-fields: %s, num pars: %d, post volume reduce counting", reduceByFields.toString(), numPars));
                }
                long firstReduceCount = srPairRdd.count();
                logger.info((Object)String.format("pipeline shrinkage: initial volume reduction count: %d", firstReduceCount));
                ExecutionHints hints = (ExecutionHints)ctx.get("execution.hints");
                int initialRequiredPars = requiredPars = hints.reducePartitionCount(firstReduceCount, reduceByFields.size());
                requiredPars = Math.min(requiredPars, Utils.getMaxShuffleTasksSupported());
                boolean compact = true;
                if (numCores > 0 && requiredPars <= numCores) {
                    compact = false;
                    logger.info((Object)String.format("pipeline shrinkage: stopping further compaction since the dataset required pars: %d is already <= num cores: %d", requiredPars, numCores));
                    requiredPars = numCores;
                }
                if (compact && !Utils.compactReducePartitions()) {
                    compact = false;
                }
                if (compact) {
                    requiredPars = Utils.compactPartitions(requiredPars);
                }
                if (requiredPars < srPairRdd.getNumPartitions()) {
                    logger.info((Object)String.format("pipeline shrinkage: will try to reduce and shrink pipelines as much as possible: required partitions: %d", requiredPars));
                    srPairRdd = srPairRdd.coalesce(requiredPars, false);
                    srPairRdd = srPairRdd.mapPartitionsToPair((PairFlatMapFunction)new PairFlatMapFunction<Iterator<Tuple2<SearchResult, SearchResult>>, SearchResult, SearchResult>(){
                        private static final long serialVersionUID = 1L;

                        public Iterator<Tuple2<SearchResult, SearchResult>> call(Iterator<Tuple2<SearchResult, SearchResult>> srKvs) throws Exception {
                            HashMap<SearchResult, SearchResult> srKvsTbl = new HashMap<SearchResult, SearchResult>(100);
                            while (srKvs.hasNext()) {
                                Tuple2<SearchResult, SearchResult> srKv = srKvs.next();
                                SearchResult key = (SearchResult)srKv._1();
                                if (key == null) continue;
                                SearchResult currVal = (SearchResult)srKvsTbl.get(key);
                                SearchResult newVal = (SearchResult)srKv._2();
                                if (currVal == null) {
                                    srKvsTbl.put(key, newVal);
                                    continue;
                                }
                                srKvsTbl.put(key, reducer.reduce(currVal, newVal));
                            }
                            ArrayList<Tuple2> retArr = new ArrayList<Tuple2>(srKvsTbl.size());
                            for (Map.Entry kv : srKvsTbl.entrySet()) {
                                retArr.add(new Tuple2(kv.getKey(), kv.getValue()));
                            }
                            return retArr.iterator();
                        }
                    }, true);
                    if (compact) {
                        logger.info((Object)String.format("will attempt second compaction, current pars %d", srPairRdd.getNumPartitions()));
                        srPairRdd = srPairRdd.persist(StorageLevel.DISK_ONLY());
                        if (engine != null) {
                            engine.setCurrentJobDescription(String.format("pipeline shrinkage: by-fields: %s, coalesced par count: %d, second reduction output counting", reduceByFields.toString(), srPairRdd.getNumPartitions()));
                        }
                        long secondReduceCount = srPairRdd.count();
                        cachedRdd.unpersist(false);
                        logger.info((Object)String.format("pipeline shrinkage: second volume reduction count: %d, previous cached rdd unpersisted", secondReduceCount));
                        int secondRequiredPars = hints.reducePartitionCount(secondReduceCount, reduceByFields.size());
                        if (numCores > 0) {
                            if (secondRequiredPars < numCores) {
                                secondRequiredPars = numCores;
                            } else if (secondRequiredPars > numCores) {
                                secondRequiredPars = Utils.compactPartitions(secondRequiredPars);
                                secondRequiredPars = Math.max(secondRequiredPars, numCores);
                            }
                        }
                        logger.info((Object)String.format("pipeline shrinkage: second required partiton count set to: %d", secondRequiredPars));
                        if (secondRequiredPars < srPairRdd.getNumPartitions()) {
                            srPairRdd = srPairRdd.coalesce(secondRequiredPars, false);
                            logger.info((Object)String.format("pipeline shrinkage: coalesced dataset for second time to shrink the pipeline further: new pars: %d", srPairRdd.getNumPartitions()));
                        } else {
                            logger.info((Object)String.format("pipeline shrinkage: did not shrink partitions further since: required pars: %d is >= num pars: %d", secondRequiredPars, srPairRdd.getNumPartitions()));
                        }
                        logger.info((Object)String.format("pipeline shrinkage: final coalesced and per partiton reduced dataset: output pars: %d, reduced count: %d", srPairRdd.getNumPartitions(), secondReduceCount));
                    } else {
                        logger.info((Object)String.format("pipeline shrinkage: will avoid second compaction: initial required pars: %d, num cores: %d", initialRequiredPars, numCores));
                    }
                } else {
                    logger.info((Object)String.format("although execution attempted to - but could not shrink pipeline because required pars: %d >= num pars: %d", requiredPars, srPairRdd.getNumPartitions()));
                }
            } else {
                logger.warn((Object)String.format("although the execution attempted to - but could not shrink pipeline since execution hints not present", new Object[0]));
            }
        }
        if (reducer.postJoin()) {
            numPars = srPairRdd.getNumPartitions();
            double boost = Utils.getPartitionBoostPostJoin();
            boost = Math.max(1.0, boost);
            numPars = (int)((double)numPars * boost);
            numPars = Math.max(numPars, 1);
            logger.info((Object)String.format("reduce partition boosted to: %d since this is post join processing", numPars));
        }
        srPairRdd = srPairRdd.partitionBy((Partitioner)new HashPartitioner(srPairRdd.getNumPartitions()));
        JavaPairRDD reducedSrPairRdd = srPairRdd.reduceByKey((Function2)new Function2<SearchResult, SearchResult, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public SearchResult call(SearchResult lhs, SearchResult rhs) throws Exception {
                return reducer.reduce(lhs, rhs);
            }
        });
        if (reducer.downReduce()) {
            reducedSrPairRdd = reducedSrPairRdd.mapToPair((PairFunction)new PairFunction<Tuple2<SearchResult, SearchResult>, SearchResult, SearchResult>(){
                private static final long serialVersionUID = 1L;

                public Tuple2<SearchResult, SearchResult> call(Tuple2<SearchResult, SearchResult> srKv) throws Exception {
                    SearchResult key = (SearchResult)srKv._1();
                    SearchResult val = (SearchResult)srKv._2();
                    Pair<SearchResult, SearchResult> downKV = reducer.downProcess(key, val);
                    return new Tuple2((Object)downKV.first(), (Object)downKV.second());
                }
            });
            reducedSrPairRdd = reducedSrPairRdd.reduceByKey((Function2)new Function2<SearchResult, SearchResult, SearchResult>(){
                private static final long serialVersionUID = 1L;

                public SearchResult call(SearchResult lhs, SearchResult rhs) throws Exception {
                    return reducer.reduce(lhs, rhs);
                }
            });
            logger.info((Object)String.format("updated dag to trigger down reduce computation for non group by cases", new Object[0]));
        }
        if (reducer.metadataRequired() && !(metadataRDD = reducedSrPairRdd.flatMapToPair((PairFlatMapFunction)new PairFlatMapFunction<Tuple2<SearchResult, SearchResult>, SearchResult.FieldMeta, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public Iterator<Tuple2<SearchResult.FieldMeta, SearchResult>> call(Tuple2<SearchResult, SearchResult> record) throws Exception {
                return reducer.gatherMetadataOnReducedData((SearchResult)record._1, (SearchResult)record._2);
            }
        })).isEmpty()) {
            JavaPairRDD finalizedMetadataRDD = metadataRDD.reduceByKey((Function2)new Function2<SearchResult, SearchResult, SearchResult>(){
                private static final long serialVersionUID = 1L;

                public SearchResult call(SearchResult v1, SearchResult v2) throws Exception {
                    return reducer.finalizeMetadata(v1, v2);
                }
            });
            List metadataList = finalizedMetadataRDD.collect();
            for (Tuple2 mdTuple : metadataList) {
                SearchResult.FieldMeta key = (SearchResult.FieldMeta)mdTuple._1;
                Object value = ((SearchResult)mdTuple._2).getFieldValue((SearchResult.FieldMeta)mdTuple._1);
                logger.info((Object)("Metadata Key=" + key + ", Value=" + value));
                if (value instanceof Long) {
                    executionContext.put(key, new _Long((long)((Long)value)));
                    continue;
                }
                logger.warn((Object)"Entry has not been added into the execution context");
            }
        }
        logger.debug((Object)("ExecutionContext:::" + executionContext));
        logger.info((Object)String.format("sdd reduce: applied reduce operation: sid: %s", sid));
        srRdd = reducedSrPairRdd.map((Function)new Function<Tuple2<SearchResult, SearchResult>, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public SearchResult call(Tuple2<SearchResult, SearchResult> pair) throws Exception {
                return reducer.merge((SearchResult)pair._1(), (SearchResult)pair._2(), executionContext);
            }
        });
        return new SparkDistributedDataset(engine, ctx, jsc, (JavaRDD<SearchResult>)srRdd, sridd, FieldExtractor.ExtractionHint.NOT_PRECOMPUTED, n, reduceByFields, false);
    }

    @Override
    public DistributedDataset transform(Reducer reducer) {
        return SparkDistributedDataset._reduce(this.engine, this.ctx, this, this.sridd, reducer, this.n);
    }

    @Override
    public DistributedDataset repartition(int pars) {
        int currPars = this.srRdd.getNumPartitions();
        if (currPars == pars) {
            return this;
        }
        JavaRDD retSrRdd = this.srRdd.repartition(pars);
        return new SparkDistributedDataset(this.engine, this.ctx, this.jsc, (JavaRDD<SearchResult>)retSrRdd, this.sridd, this.fieldExtractionHint, this.n, this.reducedByFields, false);
    }

    private static DistributedDataset _limit(ComputeEngine engine, ComputeEngineContext ctx, JavaSparkContext jsc, DistributedDataset srdd, JavaRDD<SearchResult> srRdd, DistributedDataset sridd, long n, FieldExtractor.ExtractionHint fieldExtractionHint, HashSet<SearchResult.FieldMeta> reducedByFields, boolean ordered) {
        long max = Utils.getMaxLimitRecords();
        if (n <= 0L || n > max) {
            long tempN = n;
            n = max;
            logger.warn((Object)String.format("sdd limit: set limit records to: %d since input limit: %d is not valid", n, tempN));
        }
        String sid = (String)ctx.get("sid");
        logger.info((Object)String.format("dag received by limit:\n %s, sid: %s", srRdd.toDebugString(), sid));
        try {
            if (engine != null) {
                engine.setCurrentJobDescription(String.format("retrieving top %d records", n));
            }
            List srsList = srRdd.take((int)n);
            logger.info((Object)String.format("sdd limit: fetched %d records from dataset based on limit n (or count of records whichever is lower), number of records in list: %d", n, srsList.size()));
            if (srdd.ordered()) {
                logger.info((Object)String.format("sdd limit: since dataset is ordered - taken %d search results before parallelizing back again into a single partition", srsList.size()));
                srRdd = jsc.parallelize(srsList, 1);
            } else {
                logger.info((Object)String.format("sdd limit: unorderd limit case, hence partitioning as required", new Object[0]));
                ExecutionHints hints = null;
                if (ctx.hasContext("execution.hints")) {
                    hints = (ExecutionHints)ctx.get("execution.hints");
                }
                int numFields = 2;
                if (ctx.hasContext("numfields")) {
                    numFields = (Integer)ctx.get("numfields");
                }
                int numPars = 1;
                if (hints != null) {
                    numPars = hints.defaultPartitionCount(n, numFields);
                } else {
                    logger.info((Object)String.format("Execution hints not found hence defaulting to %d", numPars));
                }
                srRdd = jsc.parallelize(srsList, numPars);
                logger.info((Object)String.format("sdd limit: unordered input records parallelized back to the workers into %d partitions, num fields use: %d, number of records shuffled: %d", numPars, numFields, srsList.size()));
            }
        }
        catch (Throwable t) {
            logger.error((Object)String.format("Error in retrieving srs, reason: %s", t.getMessage()), t);
            throw new RuntimeException(String.format("error retrieving search results: %s", t.getMessage()), t);
        }
        return new SparkDistributedDataset(engine, ctx, jsc, (JavaRDD<SearchResult>)srRdd, sridd, fieldExtractionHint, n, reducedByFields, ordered);
    }

    @Override
    public DistributedDataset join(DistributedDataset rhsdd, JoinCommand jcmd, boolean lhsRepartition, boolean rhsRepartition) {
        return SparkDistributedDataset._join(this.engine, this.ctx, this.jsc, this, rhsdd, jcmd, this.n, lhsRepartition, rhsRepartition);
    }

    private static SearchResult createKey(boolean lhs, boolean isRegularFsh, SearchResult sr, SearchResult.FieldMeta[] leftActualFieldNames, SearchResult.FieldMeta[] leftSymbolicFieldNames, SearchResult.FieldMeta[] rightActualFieldNames, SearchResult.FieldMeta[] rightSymbolicFieldNames) {
        int len = leftActualFieldNames.length;
        SearchResult.SRHashMap<SearchResult.FieldMeta, Object> key = new SearchResult.SRHashMap<SearchResult.FieldMeta, Object>(len * 2);
        SearchResult.FieldMeta[] fieldNames = new SearchResult.FieldMeta[len];
        Object[] fieldValues = new Object[len];
        for (int i = 0; i < len; ++i) {
            SearchResult.FieldMeta fieldName = lhs ? leftActualFieldNames[i] : rightActualFieldNames[i];
            SearchResult.FieldMeta symoblicFieldName = lhs ? leftSymbolicFieldNames[i] : rightSymbolicFieldNames[i];
            Object fieldValue = sr.getFieldValue(fieldName);
            fieldNames[i] = symoblicFieldName;
            fieldValues[i] = fieldValue;
            if (fieldValue == null) {
                fieldValues[i] = fieldValue = new SearchResultFactory.NullField(true);
            } else if (isRegularFsh && (fieldValue instanceof Number || fieldValue instanceof SearchResultFactory.StringField)) {
                fieldValues[i] = fieldValue = fieldValue.toString();
            }
            key.put(symoblicFieldName, fieldValue);
        }
        return SearchResultFactory.getInstance().createSearchResult(key, fieldNames, fieldValues);
    }

    private static JavaPairRDD<SearchResult, Long> getKeyCounts(JavaPairRDD<SearchResult, SearchResult> srPairRdd) {
        JavaPairRDD keyOccurrences = srPairRdd.mapToPair((PairFunction)new PairFunction<Tuple2<SearchResult, SearchResult>, SearchResult, Long>(){
            private static final long serialVersionUID = 1L;

            public Tuple2<SearchResult, Long> call(Tuple2<SearchResult, SearchResult> sr) throws Exception {
                return new Tuple2(sr._1(), (Object)1L);
            }
        });
        JavaPairRDD keyCounts = keyOccurrences.reduceByKey((Function2)new Function2<Long, Long, Long>(){
            private static final long serialVersionUID = 1L;

            public Long call(Long lhsCount, Long rhsCount) throws Exception {
                return lhsCount + rhsCount;
            }
        });
        return keyCounts;
    }

    private static JavaPairRDD<SearchResult, SearchResult> applySeed(ComputeEngine engine, JavaSparkContext jsc, JavaPairRDD<SearchResult, SearchResult> pairRdd, HashMap<SearchResult, Tuple4<Long, Long, Long, Integer>> seeds, Partitioner p, final boolean lhs) {
        final Broadcast seedsbc = jsc.broadcast(seeds);
        if (engine != null) {
            engine.setCurrentJobDescription(String.format("applying seeds for high cartesian join clause keys", new Object[0]));
        }
        JavaPairRDD justSeed = pairRdd.mapToPair((PairFunction)new PairFunction<Tuple2<SearchResult, SearchResult>, SearchResult, SearchResult>(){
            private static final long serialVersionUID = 1L;
            HashMap<SearchResult, Tuple4<Long, Long, Long, Integer>> seeds;
            {
                this.seeds = (HashMap)seedsbc.getValue();
            }

            public Tuple2<SearchResult, SearchResult> call(Tuple2<SearchResult, SearchResult> sr) throws Exception {
                SearchResult key = (SearchResult)sr._1();
                Tuple4<Long, Long, Long, Integer> seedInfo = this.seeds.get(key);
                if (seedInfo == null) {
                    return sr;
                }
                SearchResult val = (SearchResult)sr._2();
                long lhsCount = (Long)seedInfo._1();
                long rhsCount = (Long)seedInfo._2();
                int seed = (Integer)seedInfo._4();
                int seedVal = new Random().nextInt(seed);
                if (lhs && lhsCount >= rhsCount || !lhs && rhsCount > lhsCount) {
                    key = SparkDistributedDataset.addSeed(key, seedVal);
                }
                return new Tuple2((Object)key, (Object)val);
            }
        });
        justSeed = justSeed.partitionBy(p);
        JavaPairRDD repeats = justSeed.flatMapToPair((PairFlatMapFunction)new PairFlatMapFunction<Tuple2<SearchResult, SearchResult>, SearchResult, SearchResult>(){
            private static final long serialVersionUID = 1L;
            HashMap<SearchResult, Tuple4<Long, Long, Long, Integer>> seeds;
            {
                this.seeds = (HashMap)seedsbc.getValue();
            }

            public Iterator<Tuple2<SearchResult, SearchResult>> call(final Tuple2<SearchResult, SearchResult> sr) throws Exception {
                int repeats;
                SearchResult key = (SearchResult)sr._1();
                if (key.containsField(ComputeEngineConstants.SEED_VALUE_KEY)) {
                    ArrayList<Tuple2<SearchResult, SearchResult>> ret = new ArrayList<Tuple2<SearchResult, SearchResult>>();
                    ret.add(sr);
                    return ret.iterator();
                }
                Tuple4<Long, Long, Long, Integer> seedInfo = this.seeds.get(key);
                if (seedInfo == null) {
                    ArrayList<Tuple2<SearchResult, SearchResult>> ret = new ArrayList<Tuple2<SearchResult, SearchResult>>();
                    ret.add(sr);
                    return ret.iterator();
                }
                long lhsCount = (Long)seedInfo._1();
                long rhsCount = (Long)seedInfo._2();
                int seed = (Integer)seedInfo._4();
                if (lhs && lhsCount >= rhsCount || !lhs && rhsCount > lhsCount) {
                    ArrayList<Tuple2<SearchResult, SearchResult>> ret = new ArrayList<Tuple2<SearchResult, SearchResult>>();
                    ret.add(sr);
                    return ret.iterator();
                }
                final int tempRepeats = repeats = seed;
                return new Iterator<Tuple2<SearchResult, SearchResult>>(){
                    int repeats;
                    {
                        this.repeats = tempRepeats;
                    }

                    @Override
                    public boolean hasNext() {
                        return this.repeats > 0;
                    }

                    @Override
                    public Tuple2<SearchResult, SearchResult> next() {
                        --this.repeats;
                        SearchResult newkey = (SearchResult)sr._1();
                        newkey = SparkDistributedDataset.addSeed(newkey, this.repeats);
                        Tuple2 newsr = new Tuple2((Object)newkey, sr._2);
                        return newsr;
                    }

                    @Override
                    public void remove() {
                        throw new RuntimeException("remove not supported");
                    }
                };
            }
        });
        repeats = repeats.partitionBy(p);
        return repeats;
    }

    private static JavaPairRDD<Long, Integer> getPartitionCounts(JavaPairRDD<SearchResult, SearchResult> srPairRdd) {
        JavaRDD parCounts = srPairRdd.mapPartitionsWithIndex((Function2)new Function2<Integer, Iterator<Tuple2<SearchResult, SearchResult>>, Iterator<Tuple2<Integer, Long>>>(){
            private static final long serialVersionUID = 1L;

            public Iterator<Tuple2<Integer, Long>> call(Integer parId, Iterator<Tuple2<SearchResult, SearchResult>> srs) throws Exception {
                long count = 0L;
                while (srs.hasNext()) {
                    srs.next();
                    ++count;
                }
                Tuple2 parCounts = new Tuple2((Object)parId, (Object)count);
                ArrayList<Tuple2> ret = new ArrayList<Tuple2>();
                ret.add(parCounts);
                return ret.iterator();
            }
        }, true);
        JavaPairRDD pairParCounts = parCounts.mapToPair((PairFunction)new PairFunction<Tuple2<Integer, Long>, Integer, Long>(){
            private static final long serialVersionUID = 1L;

            public Tuple2<Integer, Long> call(Tuple2<Integer, Long> parCount) throws Exception {
                return parCount;
            }
        });
        pairParCounts = pairParCounts.reduceByKey((Function2)new Function2<Long, Long, Long>(){
            private static final long serialVersionUID = 1L;

            public Long call(Long lhs, Long rhs) throws Exception {
                return lhs + rhs;
            }
        });
        JavaPairRDD countPars = pairParCounts.mapToPair((PairFunction)new PairFunction<Tuple2<Integer, Long>, Long, Integer>(){
            private static final long serialVersionUID = 1L;

            public Tuple2<Long, Integer> call(Tuple2<Integer, Long> parCount) throws Exception {
                return new Tuple2(parCount._2(), parCount._1());
            }
        });
        countPars = countPars.sortByKey(false);
        return countPars;
    }

    private static void printPartitionCounts(JavaPairRDD<Long, Integer> countPars, int top) {
        List topCountPars = countPars.take(top);
        for (Tuple2 tpc : topCountPars) {
            logger.debug((Object)String.format("sdd: partition id: %d -> count: %d", tpc._2(), tpc._1()));
        }
    }

    private static long getMaxJoinCartesianAllowed() {
        long maxCart = 10000000L;
        String maxCartStr = System.getenv("MAX_JOIN_CARTESIAN_ALLOWED");
        if (maxCartStr != null && !maxCartStr.trim().isEmpty()) {
            maxCart = Long.valueOf(maxCartStr);
        }
        logger.info((Object)String.format("Max join cartesian per partition: %d", maxCart));
        return maxCart;
    }

    private static Pair<List<Tuple2<SearchResult, Tuple4<Long, Long, Long, Integer>>>, Long> calculateJoinStats(ComputeEngine engine, JavaPairRDD<SearchResult, SearchResult> lhsPairRdd, JavaPairRDD<SearchResult, SearchResult> rhsPairRdd) {
        List cartesianVals;
        JavaPairRDD<SearchResult, Long> lhsKeyCounts = SparkDistributedDataset.getKeyCounts(lhsPairRdd);
        JavaPairRDD<SearchResult, Long> rhsKeyCounts = SparkDistributedDataset.getKeyCounts(rhsPairRdd);
        JavaPairRDD joinedKeyCounts = lhsKeyCounts.join(rhsKeyCounts);
        final long maxCartAllowed = SparkDistributedDataset.getMaxJoinCartesianAllowed();
        JavaPairRDD joinStats = joinedKeyCounts.mapToPair((PairFunction)new PairFunction<Tuple2<SearchResult, Tuple2<Long, Long>>, SearchResult, Tuple4<Long, Long, Long, Integer>>(){
            private static final long serialVersionUID = 1L;

            public Tuple2<SearchResult, Tuple4<Long, Long, Long, Integer>> call(Tuple2<SearchResult, Tuple2<Long, Long>> keyCounts) throws Exception {
                long rhsCount;
                long lhsCount = (Long)((Tuple2)keyCounts._2())._1();
                long cartesian = lhsCount * (rhsCount = ((Long)((Tuple2)keyCounts._2())._2()).longValue());
                if (cartesian < maxCartAllowed) {
                    return new Tuple2(keyCounts._1(), (Object)new Tuple4((Object)lhsCount, (Object)rhsCount, (Object)cartesian, (Object)1));
                }
                long times = cartesian / maxCartAllowed;
                int seed = (int)Math.sqrt(++times);
                return new Tuple2(keyCounts._1(), (Object)new Tuple4((Object)lhsCount, (Object)rhsCount, (Object)cartesian, (Object)(++seed)));
            }
        });
        joinStats = joinStats.persist(StorageLevel.DISK_ONLY());
        JavaPairRDD cartesians = joinStats.mapToPair((PairFunction)new PairFunction<Tuple2<SearchResult, Tuple4<Long, Long, Long, Integer>>, Integer, Long>(){
            private static final long serialVersionUID = 1L;

            public Tuple2<Integer, Long> call(Tuple2<SearchResult, Tuple4<Long, Long, Long, Integer>> seedInfo) throws Exception {
                long cartesian = (Long)((Tuple4)seedInfo._2())._3();
                return new Tuple2((Object)1, (Object)cartesian);
            }
        });
        cartesians = cartesians.reduceByKey((Function2)new Function2<Long, Long, Long>(){
            private static final long serialVersionUID = 1L;

            public Long call(Long lhsCart, Long rhsCart) throws Exception {
                return lhsCart + rhsCart;
            }
        });
        long joinCount = -1L;
        if (engine != null) {
            engine.setCurrentJobDescription(String.format("retrieving join cartesian stats", new Object[0]));
        }
        if (!(cartesianVals = cartesians.collect()).isEmpty()) {
            joinCount = (Long)((Tuple2)cartesianVals.get(0))._2();
            logger.info((Object)String.format("Estimated total count of the join: %d", joinCount));
        } else {
            logger.info((Object)String.format("Estimated join count is either zero or a very small number", new Object[0]));
        }
        JavaPairRDD filteredSeeds = joinStats.filter((Function)new Function<Tuple2<SearchResult, Tuple4<Long, Long, Long, Integer>>, Boolean>(){
            private static final long serialVersionUID = 1L;

            public Boolean call(Tuple2<SearchResult, Tuple4<Long, Long, Long, Integer>> seedInfo) throws Exception {
                if ((Integer)((Tuple4)seedInfo._2())._4() > 1) {
                    return true;
                }
                return false;
            }
        });
        if (engine != null) {
            engine.setCurrentJobDescription(String.format("retrieving join clause keys which require seeds, join count: %d", joinCount));
        }
        List filteredSeedsLocal = filteredSeeds.collect();
        return new Pair<List<Tuple2<SearchResult, Tuple4<Long, Long, Long, Integer>>>, Long>(filteredSeedsLocal, joinCount);
    }

    private static long getMaxJoinCountAllowed() {
        long maxJoinCount = 10000000000L;
        String maxJoinCountStr = System.getenv("DFS_MAX_JOIN_COUNT");
        if (maxJoinCountStr != null && !maxJoinCountStr.trim().isEmpty()) {
            maxJoinCount = Long.parseLong(maxJoinCountStr);
            logger.debug((Object)String.format("max join count set through env var: %s", maxJoinCountStr));
        }
        logger.debug((Object)String.format("Max join count allowed: %d, this value can be increased through configuration", maxJoinCount));
        return maxJoinCount;
    }

    private static boolean recordJoinInputs() {
        boolean record = false;
        String recordStr = System.getenv("DFS_RECORD_JOIN_INPUTS");
        if (recordStr != null) {
            try {
                record = Boolean.valueOf(recordStr);
            }
            catch (Throwable t) {
                logger.info((Object)String.format("could not parse to-record envvar: %s, reason: %s", "DFS_RECORD_JOIN_INPUTS", t.getMessage()));
            }
        }
        return record;
    }

    private static void printSample(String message, JavaPairRDD<SearchResult, SearchResult> rdd, int numRecs) {
        logger.info((Object)String.format("%s --> ", message));
        List sample = rdd.take(numRecs);
        for (Tuple2 tuple : sample) {
            logger.info((Object)String.format("search result kv >> %s : %s", ((SearchResult)tuple._1()).toString(), ((SearchResult)tuple._2()).toString()));
        }
    }

    private static JoinNode[] getJoinClauses(JoinCommand jcmd) {
        if (jcmd.getJoinClauses().length > 0) {
            return jcmd.getJoinClauses();
        }
        ArrayList<JoinNode> commonFields = new ArrayList<JoinNode>();
        SearchResult.FieldMeta[] lhsSourceFields = SparkDistributedDataset.getSourceFields(jcmd.getSources(), jcmd.getLhsAlias(), true);
        SearchResult.FieldMeta[] rhsSourceFields = SparkDistributedDataset.getSourceFields(jcmd.getSources(), jcmd.getRhsAlias(), false);
        for (SearchResult.FieldMeta lfield : lhsSourceFields) {
            for (SearchResult.FieldMeta rfield : rhsSourceFields) {
                if (SparkDistributedDataset.isInternalFieldToSuppress(false, lfield) || !lfield.equals(rfield)) continue;
                commonFields.add(new JoinNode(lfield.fieldName()));
            }
        }
        return commonFields.toArray(new JoinNode[commonFields.size()]);
    }

    private static boolean isRegularFshSource(CommandNode node) {
        if (node == null) {
            return false;
        }
        if (!(node instanceof RdInCommand)) {
            return false;
        }
        String remoteSid = ((RdInCommand)node).getRemoteSid();
        return remoteSid != null && remoteSid.startsWith("fsh");
    }

    private static boolean autoDecideJoinStatsCalculations() {
        boolean autoDetect = true;
        String autoDetectStr = System.getenv("DFS_AUTO_DETECT_JOIN_STATS_CALCULATION");
        boolean setViaEnvvar = false;
        if (autoDetectStr != null && !autoDetectStr.trim().isEmpty()) {
            try {
                autoDetect = Boolean.valueOf(autoDetectStr);
                setViaEnvvar = true;
            }
            catch (Throwable t) {
                logger.warn((Object)String.format("could not parse auto detect join calculation envvar to boolean: %s", autoDetectStr));
            }
        }
        logger.info((Object)String.format("auto detect join calculation set to: %b via envvar: %b", autoDetect, setViaEnvvar));
        return autoDetect;
    }

    private static String toStringJoinClauseFields(SearchResult.FieldMeta[] leftActualFields, SearchResult.FieldMeta[] rightActualFields) {
        StringBuilder sb = new StringBuilder();
        sb.append("join clause fields: { ");
        int len = leftActualFields.length;
        for (int i = 0; i < len; ++i) {
            SearchResult.FieldMeta leftFld = leftActualFields[i];
            SearchResult.FieldMeta rightFld = rightActualFields[i];
            sb.append(String.format("index = %d: left: %s, right: %s", i, leftFld.fieldName(), rightFld.fieldName()));
            if (i >= len - 1) continue;
            sb.append(", ");
        }
        sb.append("}");
        return sb.toString();
    }

    private static DistributedDataset _join(ComputeEngine engine, ComputeEngineContext ctx, JavaSparkContext jsc, DistributedDataset lhsdd, DistributedDataset rhsdd, JoinCommand jcmd, long n, boolean lhsRepartition, boolean rhsRepartition) {
        JavaRDD<SearchResult> lhsSriRdd;
        JavaRDD<SearchResult> ret;
        JavaPairRDD joinedPairRdd;
        JavaRDD<SearchResult> lhsrdd = ((SparkDistributedDataset)lhsdd).getSrRdd();
        JavaRDD<SearchResult> rhsrdd = ((SparkDistributedDataset)rhsdd).getSrRdd();
        ExecutionHints hints = (ExecutionHints)ctx.get("execution.hints");
        JoinNode[] clauses = SparkDistributedDataset.getJoinClauses(jcmd);
        final SearchResult.FieldMeta[] leftActualFieldNames = new SearchResult.FieldMeta[clauses.length];
        final SearchResult.FieldMeta[] leftSymbolicFieldNames = new SearchResult.FieldMeta[clauses.length];
        final SearchResult.FieldMeta[] rightActualFieldNames = new SearchResult.FieldMeta[clauses.length];
        final SearchResult.FieldMeta[] rightSymbolicFieldNames = new SearchResult.FieldMeta[clauses.length];
        final boolean isJoinToRegularFsh = SparkDistributedDataset.isRegularFshSource(jcmd.getLhs()) || SparkDistributedDataset.isRegularFshSource(jcmd.getRhs());
        for (int i = 0; i < clauses.length; ++i) {
            leftActualFieldNames[i] = SearchResult.FieldMeta.newFieldMeta(clauses[i].getLeftSide().getFieldName());
            leftSymbolicFieldNames[i] = SearchResult.FieldMeta.newFieldMeta("f" + i);
            rightActualFieldNames[i] = SearchResult.FieldMeta.newFieldMeta(clauses[i].getRightSide().getFieldName());
            rightSymbolicFieldNames[i] = SearchResult.FieldMeta.newFieldMeta("f" + i);
        }
        JavaPairRDD<SearchResult, SearchResult> lhsPairRdd = lhsrdd.mapToPair((PairFunction)new PairFunction<SearchResult, SearchResult, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public Tuple2<SearchResult, SearchResult> call(SearchResult sr) throws Exception {
                SearchResult key = SparkDistributedDataset.createKey(true, isJoinToRegularFsh, sr, leftActualFieldNames, leftSymbolicFieldNames, rightActualFieldNames, rightSymbolicFieldNames);
                return new Tuple2((Object)key, (Object)sr);
            }
        });
        JavaPairRDD<SearchResult, SearchResult> rhsPairRdd = rhsrdd.mapToPair((PairFunction)new PairFunction<SearchResult, SearchResult, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public Tuple2<SearchResult, SearchResult> call(SearchResult sr) throws Exception {
                SearchResult key = SparkDistributedDataset.createKey(false, isJoinToRegularFsh, sr, leftActualFieldNames, leftSymbolicFieldNames, rightActualFieldNames, rightSymbolicFieldNames);
                return new Tuple2((Object)key, (Object)sr);
            }
        });
        int numfields = 2;
        if (ctx.hasContext("numfields")) {
            numfields = (Integer)ctx.get("numfields");
        }
        lhsPairRdd = lhsPairRdd.persist(StorageLevel.DISK_ONLY());
        rhsPairRdd = rhsPairRdd.persist(StorageLevel.DISK_ONLY());
        if (SparkDistributedDataset.recordJoinInputs()) {
            logger.info((Object)String.format("input pair rdd counts to the join: lhs: %d, rhs: %d", lhsPairRdd.count(), rhsPairRdd.count()));
            SparkDistributedDataset.printSample("lhs sample data", lhsPairRdd, 1000);
            SparkDistributedDataset.printSample("rhs sample data", rhsPairRdd, 1000);
        }
        HashSet<SearchResult.FieldMeta> lhsReduceByFields = lhsdd.getReducedByFields();
        HashSet<SearchResult.FieldMeta> rhsReduceByFields = rhsdd.getReducedByFields();
        boolean calculateJoinStats = false;
        if (SparkDistributedDataset.autoDecideJoinStatsCalculations()) {
            int len = leftActualFieldNames.length;
            if (len == lhsReduceByFields.size() && len == rhsReduceByFields.size()) {
                for (int i = 0; i < len; ++i) {
                    SearchResult.FieldMeta lhsJoinField = leftActualFieldNames[i];
                    if (!lhsReduceByFields.contains(lhsJoinField)) {
                        calculateJoinStats = true;
                    } else {
                        SearchResult.FieldMeta rhsJoinField = rightActualFieldNames[i];
                        if (rhsReduceByFields.contains(rhsJoinField)) continue;
                        calculateJoinStats = true;
                    }
                    break;
                }
            } else {
                calculateJoinStats = true;
                logger.warn((Object)String.format("strongly recommend to avoid joins on fields which are not identical to left and right reduce by fields: %s, left dd reduce by fields: %s, right dd reduce by fields: %s", SparkDistributedDataset.toStringJoinClauseFields(leftActualFieldNames, rightActualFieldNames), lhsReduceByFields.toString(), rhsReduceByFields.toString()));
            }
        } else {
            calculateJoinStats = true;
        }
        if (calculateJoinStats) {
            logger.info((Object)String.format("will calculate join stats (it is expensive): %s, left dd reduce by fields: %s, right dd reduce by fields: %s", SparkDistributedDataset.toStringJoinClauseFields(leftActualFieldNames, rightActualFieldNames), lhsReduceByFields.toString(), rhsReduceByFields.toString()));
            long start = System.currentTimeMillis();
            Pair<List<Tuple2<SearchResult, Tuple4<Long, Long, Long, Integer>>>, Long> joinStats = SparkDistributedDataset.calculateJoinStats(engine, lhsPairRdd, rhsPairRdd);
            logger.info((Object)String.format("time taken for join stats calculations: %d millis", System.currentTimeMillis() - start));
            if (Utils.isDfsElasticModeEnabled()) {
                engine.unconstrainedParallelism(true);
            }
            long joinCount = joinStats.second();
            int joinPars = hints.defaultPartitionCount(joinCount, numfields);
            HashPartitioner p = new HashPartitioner(joinPars);
            long maxJoinCount = SparkDistributedDataset.getMaxJoinCountAllowed();
            if (maxJoinCount < joinCount) {
                throw new RuntimeException(String.format("max join output count exceeded: allowed: %d, actual join count: %d, you can increase the value through configuration", maxJoinCount, joinCount));
            }
            logger.info((Object)String.format("sdd join: partitioned lhs and rhs: to %d partitions since estimated join output count: %d", joinPars, joinCount));
            List<Tuple2<SearchResult, Tuple4<Long, Long, Long, Integer>>> filteredSeeds = joinStats.first();
            if (!filteredSeeds.isEmpty()) {
                logger.info((Object)String.format("sdd join: seed requirement detected: %d seeds", filteredSeeds.size()));
                Iterator<Tuple2<SearchResult, Tuple4<Long, Long, Long, Integer>>> fsli = filteredSeeds.iterator();
                HashMap<SearchResult, Tuple4<Long, Long, Long, Integer>> seeds = new HashMap<SearchResult, Tuple4<Long, Long, Long, Integer>>();
                while (fsli.hasNext()) {
                    Tuple2<SearchResult, Tuple4<Long, Long, Long, Integer>> fsle = fsli.next();
                    SearchResult key = (SearchResult)fsle._1();
                    seeds.put(key, (Tuple4<Long, Long, Long, Integer>)fsle._2());
                    long lhsCount1 = (Long)((Tuple4)fsle._2())._1();
                    long rhsCount1 = (Long)((Tuple4)fsle._2())._2();
                    int seed = (Integer)((Tuple4)fsle._2())._4();
                    logger.info((Object)String.format("sdd join: high cartesian key: %s, lhs count: %d, rhs count: %d, calculated seed: %d", key.getDataMap(), lhsCount1, rhsCount1, seed));
                }
                lhsPairRdd = SparkDistributedDataset.applySeed(engine, jsc, lhsPairRdd, seeds, (Partitioner)p, true);
                rhsPairRdd = SparkDistributedDataset.applySeed(engine, jsc, rhsPairRdd, seeds, (Partitioner)p, false);
            } else {
                int lhsPars = lhsPairRdd.getNumPartitions();
                if (joinPars > lhsPars) {
                    lhsPairRdd = lhsPairRdd.partitionBy((Partitioner)p);
                    logger.info((Object)String.format("join partition count: %d is higher than lhs partitions: %d hence dd is hash partitioned", joinPars, lhsPars));
                } else {
                    logger.info((Object)String.format("join partition count: %d is lower than equal lhs partitions: %d hence did not re-hash partition", joinPars, lhsPars));
                }
                int rhsPars = rhsPairRdd.getNumPartitions();
                if (joinPars > rhsPars) {
                    rhsPairRdd = rhsPairRdd.partitionBy((Partitioner)p);
                    logger.info((Object)String.format("join partition count: %d is higher than rhs partitions: %d hence dd is hash partitioned", joinPars, rhsPars));
                } else {
                    logger.info((Object)String.format("join partition count: %d is lower than equal rhs partitions: %d hence did not re-hash partition", joinPars, rhsPars));
                }
            }
        } else {
            logger.info((Object)String.format("will avoid join stats calculation: %s, left dd reduce by fields: %s, right dd reduce by fields: %s", SparkDistributedDataset.toStringJoinClauseFields(leftActualFieldNames, rightActualFieldNames), lhsReduceByFields.toString(), rhsReduceByFields.toString()));
        }
        JoinType type = jcmd.getJoinType();
        SearchResult.FieldMeta[] leftSourceFields = SparkDistributedDataset.getSourceFields(jcmd.getSources(), jcmd.getLhsAlias(), true);
        SearchResult.FieldMeta[] rightsourceFields = SparkDistributedDataset.getSourceFields(jcmd.getSources(), jcmd.getRhsAlias(), false);
        boolean retainMVFields = false;
        if (ctx.hasContext("retain.multi.value.fields")) {
            retainMVFields = (Boolean)ctx.get("retain.multi.value.fields");
        }
        logger.debug((Object)String.format("retain mv fields set to: %b", retainMVFields));
        if (type.equals((Object)JoinType.INNER)) {
            joinedPairRdd = lhsPairRdd.join((JavaPairRDD)rhsPairRdd);
            logger.info((Object)String.format("sdd join: inner join output dataset contains %d partitions", joinedPairRdd.getNumPartitions()));
            ret = SparkDistributedDataset._merge1((JavaPairRDD<SearchResult, Tuple2<SearchResult, SearchResult>>)joinedPairRdd, jcmd, retainMVFields);
        } else if (type.equals((Object)JoinType.FULL) || type.equals((Object)JoinType.OUTER)) {
            joinedPairRdd = lhsPairRdd.fullOuterJoin((JavaPairRDD)rhsPairRdd);
            logger.info((Object)String.format("sdd join: full outer join output dataset contains %d partitions", joinedPairRdd.getNumPartitions()));
            ret = SparkDistributedDataset._merge2((JavaPairRDD<SearchResult, Tuple2<Optional<SearchResult>, Optional<SearchResult>>>)joinedPairRdd, jcmd, leftSourceFields, rightsourceFields, retainMVFields);
        } else if (type.equals((Object)JoinType.LEFT)) {
            joinedPairRdd = lhsPairRdd.leftOuterJoin((JavaPairRDD)rhsPairRdd);
            logger.info((Object)String.format("sdd join: left outer join output dataset contains %d partitions", joinedPairRdd.getNumPartitions()));
            ret = SparkDistributedDataset._merge3((JavaPairRDD<SearchResult, Tuple2<SearchResult, Optional<SearchResult>>>)joinedPairRdd, jcmd, rightsourceFields, retainMVFields);
        } else if (type.equals((Object)JoinType.RIGHT)) {
            joinedPairRdd = lhsPairRdd.rightOuterJoin((JavaPairRDD)rhsPairRdd);
            logger.info((Object)String.format("sdd join: right outer join output dataset contains %d partitions", joinedPairRdd.getNumPartitions()));
            ret = SparkDistributedDataset._merge4((JavaPairRDD<SearchResult, Tuple2<Optional<SearchResult>, SearchResult>>)joinedPairRdd, jcmd, leftSourceFields, retainMVFields);
        } else {
            throw new RuntimeException(String.format("sdd join: join type %s not supported", type));
        }
        JavaRDD<SearchResult> sriRdd = lhsSriRdd = ((SparkDistributedDataset)lhsdd.getsriDD()).getSrRdd();
        SparkDistributedDataset sridd = new SparkDistributedDataset(engine, ctx, jsc, sriRdd, lhsdd.getsriDD().getsriDD(), FieldExtractor.ExtractionHint.NOT_PRECOMPUTED, n, new HashSet<SearchResult.FieldMeta>(), false);
        return new SparkDistributedDataset(engine, ctx, jsc, ret, sridd, FieldExtractor.ExtractionHint.NOT_PRECOMPUTED, n, new HashSet<SearchResult.FieldMeta>(), false);
    }

    private static SearchResult.FieldMeta[] getSourceFields(CommandNode[] sources, String alias, boolean isLeft) {
        SearchResult.FieldMeta[] sourceFields = new SearchResult.FieldMeta[]{};
        if (sources == null || sources.length != 2) {
            return sourceFields;
        }
        int index = isLeft ? 0 : 1;
        FieldProperties[] fieldProperties = sources[index].getFieldPropertiesArray();
        ArrayList<SearchResult.FieldMeta> fields = new ArrayList<SearchResult.FieldMeta>();
        if (fieldProperties != null) {
            for (FieldProperties fieldName : fieldProperties) {
                if ("*".equals(fieldName.getName())) continue;
                if (alias != null && alias.trim().length() > 0) {
                    fields.add(SearchResult.FieldMeta.newFieldMeta(alias + "." + fieldName.getName()));
                    continue;
                }
                fields.add(SearchResult.FieldMeta.newFieldMeta(fieldName.getName()));
            }
        }
        return fields.toArray(sourceFields);
    }

    private static SearchResult addSeed(SearchResult sr, int seedVal) {
        if (sr.containsField(SEED_VALUE_KEY)) {
            throw new RuntimeException(String.format("key is already seeded: %s", sr.toString()));
        }
        SearchResult.SRHashMap<SearchResult.FieldMeta, Object> dataPrev = sr.getDataMap();
        SearchResult.SRHashMap<SearchResult.FieldMeta, Object> data = new SearchResult.SRHashMap<SearchResult.FieldMeta, Object>((Map<SearchResult.FieldMeta, Object>)dataPrev);
        SearchResult.FieldMeta[] fields = sr.getFieldNames();
        Object[] vals = sr.getFieldValues();
        SearchResult.FieldMeta[] newFields = new SearchResult.FieldMeta[fields.length + 1];
        Object[] newVals = new Object[vals.length + 1];
        System.arraycopy(fields, 0, newFields, 0, fields.length);
        System.arraycopy(vals, 0, newVals, 0, vals.length);
        data.put(SEED_VALUE_KEY, seedVal);
        newFields[newFields.length - 1] = SEED_VALUE_KEY;
        newVals[newVals.length - 1] = seedVal;
        return SearchResultFactory.getInstance().createSearchResult(data, newFields, newVals);
    }

    public static String getAliasPrefix(String alias) {
        if (alias.isEmpty()) {
            return alias;
        }
        return alias + ".";
    }

    private static JavaRDD<SearchResult> _merge1(JavaPairRDD<SearchResult, Tuple2<SearchResult, SearchResult>> joinedPairRdd, JoinCommand jcmd, final boolean retainMVFields) {
        final String lhsAliasPrefix = SparkDistributedDataset.getAliasPrefix(jcmd.getLhsAlias());
        final String rhsAliasPrefix = SparkDistributedDataset.getAliasPrefix(jcmd.getRhsAlias());
        JavaRDD ret = joinedPairRdd.map((Function)new Function<Tuple2<SearchResult, Tuple2<SearchResult, SearchResult>>, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public SearchResult call(Tuple2<SearchResult, Tuple2<SearchResult, SearchResult>> tuple) throws Exception {
                SearchResult key = (SearchResult)tuple._1();
                SearchResult lhs = (SearchResult)((Tuple2)tuple._2())._1();
                SearchResult rhs = (SearchResult)((Tuple2)tuple._2())._2();
                return SparkDistributedDataset.merge(key, lhsAliasPrefix, lhs, rhsAliasPrefix, rhs, new SearchResult.FieldMeta[0], new SearchResult.FieldMeta[0], retainMVFields);
            }
        });
        return ret;
    }

    private static JavaRDD<SearchResult> _merge2(JavaPairRDD<SearchResult, Tuple2<Optional<SearchResult>, Optional<SearchResult>>> joinedPairRdd, JoinCommand jcmd, final SearchResult.FieldMeta[] leftFields, final SearchResult.FieldMeta[] rightFields, final boolean retainMVFields) {
        final String lhsAliasPrefix = SparkDistributedDataset.getAliasPrefix(jcmd.getLhsAlias());
        final String rhsAliasPrefix = SparkDistributedDataset.getAliasPrefix(jcmd.getRhsAlias());
        JavaRDD ret = joinedPairRdd.map((Function)new Function<Tuple2<SearchResult, Tuple2<Optional<SearchResult>, Optional<SearchResult>>>, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public SearchResult call(Tuple2<SearchResult, Tuple2<Optional<SearchResult>, Optional<SearchResult>>> tuple) throws Exception {
                SearchResult key = (SearchResult)tuple._1();
                SearchResult lhs = SearchResultFactory.getInstance().emptyResult();
                if (((Optional)((Tuple2)tuple._2())._1()).isPresent()) {
                    lhs = (SearchResult)((Optional)((Tuple2)tuple._2())._1()).get();
                }
                SearchResult rhs = SearchResultFactory.getInstance().emptyResult();
                if (((Optional)((Tuple2)tuple._2())._2()).isPresent()) {
                    rhs = (SearchResult)((Optional)((Tuple2)tuple._2())._2()).get();
                }
                return SparkDistributedDataset.merge(key, lhsAliasPrefix, lhs, rhsAliasPrefix, rhs, leftFields, rightFields, retainMVFields);
            }
        });
        return ret;
    }

    private static JavaRDD<SearchResult> _merge3(JavaPairRDD<SearchResult, Tuple2<SearchResult, Optional<SearchResult>>> joinedPairRdd, JoinCommand jcmd, final SearchResult.FieldMeta[] rightFields, final boolean retainMVFields) {
        final String lhsAliasPrefix = SparkDistributedDataset.getAliasPrefix(jcmd.getLhsAlias());
        final String rhsAliasPrefix = SparkDistributedDataset.getAliasPrefix(jcmd.getRhsAlias());
        JavaRDD ret = joinedPairRdd.map((Function)new Function<Tuple2<SearchResult, Tuple2<SearchResult, Optional<SearchResult>>>, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public SearchResult call(Tuple2<SearchResult, Tuple2<SearchResult, Optional<SearchResult>>> tuple) throws Exception {
                SearchResult key = (SearchResult)tuple._1();
                SearchResult lhs = (SearchResult)((Tuple2)tuple._2())._1();
                SearchResult rhs = SearchResultFactory.getInstance().emptyResult();
                if (((Optional)((Tuple2)tuple._2())._2()).isPresent()) {
                    rhs = (SearchResult)((Optional)((Tuple2)tuple._2())._2()).get();
                }
                return SparkDistributedDataset.merge(key, lhsAliasPrefix, lhs, rhsAliasPrefix, rhs, new SearchResult.FieldMeta[0], rightFields, retainMVFields);
            }
        });
        return ret;
    }

    private static JavaRDD<SearchResult> _merge4(JavaPairRDD<SearchResult, Tuple2<Optional<SearchResult>, SearchResult>> joinedPairRdd, JoinCommand jcmd, final SearchResult.FieldMeta[] leftFields, final boolean retainMVFields) {
        final String lhsAliasPrefix = SparkDistributedDataset.getAliasPrefix(jcmd.getLhsAlias());
        final String rhsAliasPrefix = SparkDistributedDataset.getAliasPrefix(jcmd.getRhsAlias());
        JavaRDD ret = joinedPairRdd.map((Function)new Function<Tuple2<SearchResult, Tuple2<Optional<SearchResult>, SearchResult>>, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public SearchResult call(Tuple2<SearchResult, Tuple2<Optional<SearchResult>, SearchResult>> tuple) throws Exception {
                SearchResult key = (SearchResult)tuple._1();
                SearchResult lhs = SearchResultFactory.getInstance().emptyResult();
                if (((Optional)((Tuple2)tuple._2())._1()).isPresent()) {
                    lhs = (SearchResult)((Optional)((Tuple2)tuple._2())._1()).get();
                }
                SearchResult rhs = (SearchResult)((Tuple2)tuple._2())._2();
                return SparkDistributedDataset.merge(key, lhsAliasPrefix, lhs, rhsAliasPrefix, rhs, leftFields, new SearchResult.FieldMeta[0], retainMVFields);
            }
        });
        return ret;
    }

    private static SearchResult merge(SearchResult key, String lhsAliasPrefix, SearchResult lhs, String rhsAliasPrefix, SearchResult rhs, SearchResult.FieldMeta[] leftFields, SearchResult.FieldMeta[] rightFields, boolean retainMVFields) {
        SearchResult.FieldMeta newFieldName;
        Object fieldValue;
        SearchResult.FieldMeta fieldName;
        int i;
        SearchResult.FieldMeta[] lhsFieldNames = null;
        Object[] lhsFieldValues = null;
        SearchResult.FieldMeta[] rhsFieldNames = null;
        Object[] rhsFieldValues = null;
        if (!retainMVFields) {
            lhs = lhs.filterMVs();
            rhs = rhs.filterMVs();
        }
        int lhsLen = 0;
        if (lhs != null) {
            lhsFieldNames = lhs.getFieldNames();
            lhsFieldValues = lhs.getFieldValues();
            lhsLen = lhsFieldValues.length;
        }
        int rhsLen = 0;
        if (rhs != null) {
            rhsFieldNames = rhs.getFieldNames();
            rhsFieldValues = rhs.getFieldValues();
            rhsLen = rhsFieldNames.length;
        }
        int totalLen = (lhsLen == 0 ? leftFields.length : lhsLen) + (rhsLen == 0 ? rightFields.length : rhsLen);
        SearchResult.FieldMeta[] fields = new SearchResult.FieldMeta[totalLen];
        Object[] vals = new Object[totalLen];
        SearchResult.SRHashMap<SearchResult.FieldMeta, Object> ret = new SearchResult.SRHashMap<SearchResult.FieldMeta, Object>(totalLen * 2);
        int keyLen = 0;
        if (lhs != null) {
            if (lhsLen == 0) {
                for (i = 0; i < leftFields.length; ++i) {
                    fieldName = leftFields[i];
                    if (SparkDistributedDataset.isInternalFieldToSuppress(retainMVFields, fieldName)) continue;
                    fieldValue = new SearchResultFactory.NullField(true);
                    ret.put(fieldName, fieldValue);
                    fields[keyLen + i] = fieldName;
                    vals[keyLen + i] = fieldValue;
                }
            } else {
                for (i = 0; i < lhsLen; ++i) {
                    fieldName = lhsFieldNames[i];
                    fieldValue = lhsFieldValues[i];
                    newFieldName = null;
                    newFieldName = SparkDistributedDataset.isInternalFieldToSuppress(retainMVFields, fieldName) ? fieldName : SearchResult.FieldMeta.newFieldMeta(lhsAliasPrefix + fieldName);
                    ret.put(newFieldName, fieldValue);
                    fields[keyLen + i] = newFieldName;
                    vals[keyLen + i] = fieldValue;
                }
            }
        }
        if (rhs != null) {
            if (rhsLen == 0) {
                for (i = 0; i < rightFields.length; ++i) {
                    fieldName = rightFields[i];
                    if (SparkDistributedDataset.isInternalFieldToSuppress(retainMVFields, fieldName) || ret.containsKey(fieldName)) continue;
                    fieldValue = new SearchResultFactory.NullField(true);
                    ret.put(fieldName, fieldValue);
                    fields[keyLen + lhsLen + i] = fieldName;
                    vals[keyLen + lhsLen + i] = fieldValue;
                }
            } else {
                for (i = 0; i < rhsLen; ++i) {
                    fieldName = rhsFieldNames[i];
                    fieldValue = rhsFieldValues[i];
                    newFieldName = null;
                    if (SparkDistributedDataset.isInternalFieldToSuppress(retainMVFields, fieldName)) {
                        newFieldName = fieldName;
                        if (ret.containsKey(newFieldName)) {
                            continue;
                        }
                    } else {
                        newFieldName = SearchResult.FieldMeta.newFieldMeta(rhsAliasPrefix + fieldName);
                    }
                    ret.put(newFieldName, fieldValue);
                    fields[keyLen + lhsLen + i] = newFieldName;
                    vals[keyLen + lhsLen + i] = fieldValue;
                }
            }
        }
        SearchResult srRet = SearchResultFactory.getInstance().createSearchResult(ret, fields, vals);
        return srRet;
    }

    private static boolean isInternalFieldToSuppress(boolean retainMVFields, SearchResult.FieldMeta field) {
        return field != null && field.length() != 0 && field.charAt(0) == '_' && (!retainMVFields || field.fieldName() == null || field.length() < 4 || !field.fieldName().startsWith("__mv_")) || field != null && field.fieldName() != null && (field.fieldName().startsWith("prestats_reserved_*") || field.fieldName().startsWith("psrsvd_*"));
    }

    @Override
    public int partitions() {
        return this.srRdd.getNumPartitions();
    }

    @Override
    public FieldExtractor.ExtractionHint fieldExtractionHint() {
        return this.fieldExtractionHint;
    }

    @Override
    public DistributedDataset transform(Reporter reporter) {
        return SparkDistributedDataset._report(this.engine, this.ctx, this.jsc, this.srRdd, this.sridd, reporter, this.fieldExtractionHint, this.n, this.reducedByFields, this.ordered);
    }

    private static DistributedDataset _report(ComputeEngine engine, ComputeEngineContext ctx, JavaSparkContext jsc, JavaRDD<SearchResult> srRdd, DistributedDataset sridd, final Reporter reporter, FieldExtractor.ExtractionHint fieldExtractionHint, long n, HashSet<SearchResult.FieldMeta> reducedByFields, boolean ordered) {
        JavaRDD ret = srRdd.mapPartitionsWithIndex((Function2)new Function2<Integer, Iterator<SearchResult>, Iterator<SearchResult>>(){
            private static final long serialVersionUID = 1L;

            public Iterator<SearchResult> call(Integer parId, Iterator<SearchResult> srs) throws Exception {
                while (srs.hasNext()) {
                    SearchResult sr = srs.next();
                    reporter.report(parId, sr);
                }
                return srs;
            }
        }, true);
        return new SparkDistributedDataset(engine, ctx, jsc, (JavaRDD<SearchResult>)ret, sridd, fieldExtractionHint, n, reducedByFields, ordered);
    }

    @Override
    public DistributedDataset transform(Filter filter) {
        return SparkDistributedDataset._filter(this.engine, this.ctx, this.jsc, this.srRdd, this.sridd, this.fieldExtractionHint, filter, this.n, this.reducedByFields, this.ordered);
    }

    private static DistributedDataset _filter(ComputeEngine engine, ComputeEngineContext ctx, JavaSparkContext jsc, JavaRDD<SearchResult> srRdd, DistributedDataset sridd, FieldExtractor.ExtractionHint fieldExtractionHint, final Filter filter, long n, HashSet<SearchResult.FieldMeta> reducedByFields, boolean ordered) {
        JavaRDD ret = srRdd.filter((Function)new Function<SearchResult, Boolean>(){
            private static final long serialVersionUID = 1L;

            public Boolean call(SearchResult sr) throws Exception {
                return filter.accept(sr);
            }
        });
        return new SparkDistributedDataset(engine, ctx, jsc, (JavaRDD<SearchResult>)ret, sridd, fieldExtractionHint, n, reducedByFields, ordered);
    }

    @Override
    public DistributedDataset resetSearchResults(List<SearchResult> srs, int numPars) {
        JavaRDD ret = this.jsc.parallelize(srs, numPars);
        return new SparkDistributedDataset(this.engine, this.ctx, this.jsc, (JavaRDD<SearchResult>)ret, this.sridd, this.fieldExtractionHint, this.n, this.reducedByFields, false);
    }

    @Override
    public HashSet<SearchResult.FieldMeta> getReducedByFields() {
        return this.reducedByFields;
    }

    @Override
    public DistributedDataset limit(long n) {
        return SparkDistributedDataset._limit(this.engine, this.ctx, this.jsc, this, this.srRdd, this.sridd, n, this.fieldExtractionHint, this.reducedByFields, this.ordered);
    }

    public static JavaRDD<SearchResult> _mapPartitionsWithDatasetId(JavaRDD<SearchResult> rdd, final int datasetId, final boolean streaming) {
        final SearchResult.FieldMeta[] datasetFields = new SearchResult.FieldMeta[]{SearchResult.FieldMeta.newFieldMeta("_dataset_index"), SearchResult.FieldMeta.newFieldMeta("_dataset_streaming")};
        return rdd.mapPartitions((FlatMapFunction)new FlatMapFunction<Iterator<SearchResult>, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public Iterator<SearchResult> call(final Iterator<SearchResult> srIterator) throws Exception {
                return new Iterator<SearchResult>(){

                    @Override
                    public boolean hasNext() {
                        return srIterator.hasNext();
                    }

                    @Override
                    public SearchResult next() {
                        if (this.hasNext()) {
                            SearchResult sr = SearchResultFactory.getInstance().merge((SearchResult)srIterator.next(), SearchResultFactory.getInstance().createSearchResult(datasetFields, new Object[]{datasetId, streaming ? Boolean.valueOf(true) : null}));
                            return sr;
                        }
                        return null;
                    }

                    @Override
                    public void remove() {
                    }
                };
            }
        }, true);
    }

    private static JavaRDD<SearchResult> _sort_union_dataset(JavaRDD<SearchResult> rdd, int datasetId, boolean streamingDataset, int partitions, UnionCommand ucmd) {
        final IOrdering[] sortByFields = ucmd.getSortByFields();
        final SearchResult.FieldMeta[] fieldsMeta = new SearchResult.FieldMeta[sortByFields.length];
        for (int i = 0; i < sortByFields.length; ++i) {
            fieldsMeta[i] = SearchResult.FieldMeta.newFieldMeta(sortByFields[i].getField().getByName());
        }
        if (ucmd.repartition()) {
            return SparkDistributedDataset._repartition_sort_union_dataset(rdd, datasetId, streamingDataset, ucmd.getSortByFields(), fieldsMeta, partitions);
        }
        if (streamingDataset && sortByFields.length > 0) {
            JavaPairRDD prdd = rdd.mapToPair((PairFunction)new PairFunction<SearchResult, SearchResult, SearchResult>(){
                private static final long serialVersionUID = 1L;

                public Tuple2<SearchResult, SearchResult> call(SearchResult sr) throws Exception {
                    int len = sortByFields.length;
                    SearchResult.FieldMeta[] fieldNames = new SearchResult.FieldMeta[len];
                    Object[] fieldValues = new Object[len];
                    SearchResult.SRHashMap<SearchResult.FieldMeta, Object> key = new SearchResult.SRHashMap<SearchResult.FieldMeta, Object>(len * 2);
                    for (int i = 0; i < len; ++i) {
                        IOrdering field = sortByFields[i];
                        SearchResult.FieldMeta fieldName = fieldsMeta[i];
                        Object val = sr.getFieldValue(fieldName);
                        key.put(fieldName, val);
                        fieldNames[i] = fieldName;
                        fieldValues[i] = val;
                    }
                    return new Tuple2((Object)SearchResultFactory.getInstance().createSearchResult(key, fieldNames, fieldValues), (Object)sr);
                }
            });
            SearchResultComparator comp = new SearchResultComparator(sortByFields);
            JavaPairRDD sortedrdd = prdd.sortByKey((Comparator)comp);
            rdd = sortedrdd.map((Function)new Function<Tuple2<SearchResult, SearchResult>, SearchResult>(){
                private static final long serialVersionUID = 1L;

                public SearchResult call(Tuple2<SearchResult, SearchResult> v1) throws Exception {
                    return (SearchResult)v1._2();
                }
            });
        }
        return SparkDistributedDataset._mapPartitionsWithDatasetId(rdd, datasetId, streamingDataset);
    }

    private static int closestPrimeNumber(int num) {
        PrimeUtils cp = new PrimeUtils();
        return cp.getClosestPrime(num);
    }

    private static JavaRDD<SearchResult> _repartition_sort_union_dataset(JavaRDD<SearchResult> rdd, int datasetId, boolean streamingDataset, final IOrdering[] sortByFields, final SearchResult.FieldMeta[] fieldsMeta, int partitions) {
        if (streamingDataset && sortByFields.length > 0) {
            JavaPairRDD prdd = rdd.mapToPair((PairFunction)new PairFunction<SearchResult, SearchResult, SearchResult>(){
                private static final long serialVersionUID = 1L;

                public Tuple2<SearchResult, SearchResult> call(SearchResult sr) throws Exception {
                    int len = sortByFields.length;
                    SearchResult.FieldMeta[] fieldNames = new SearchResult.FieldMeta[len];
                    Object[] fieldValues = new Object[len];
                    SearchResult.SRHashMap<SearchResult.FieldMeta, Object> key = new SearchResult.SRHashMap<SearchResult.FieldMeta, Object>(len * 2);
                    for (int i = 0; i < len; ++i) {
                        IOrdering field = sortByFields[i];
                        SearchResult.FieldMeta fieldName = fieldsMeta[i];
                        Object val = sr.getFieldValue(fieldName);
                        key.put(fieldName, val);
                        fieldNames[i] = fieldName;
                        fieldValues[i] = val;
                    }
                    return new Tuple2((Object)SearchResultFactory.getInstance().createSearchResult(key, fieldNames, fieldValues), (Object)sr);
                }
            });
            final int newPars = SparkDistributedDataset.closestPrimeNumber(partitions);
            logger.info((Object)("Repartition and sort within partitions " + newPars));
            final SearchResult.FieldMeta fieldName = fieldsMeta[0];
            prdd = prdd.repartitionAndSortWithinPartitions(new Partitioner(){
                private static final long serialVersionUID = 1L;

                public int getPartition(Object key) {
                    SearchResult k = (SearchResult)key;
                    SearchResultFactory.TimeField timeField = (SearchResultFactory.TimeField)k.getFieldValue(fieldName);
                    return Math.abs(timeField.hashCode() % SparkDistributedDataset.closestPrimeNumber(newPars));
                }

                public int numPartitions() {
                    return newPars;
                }
            }, (Comparator)new SearchResultComparator(sortByFields));
            rdd = prdd.map((Function)new Function<Tuple2<SearchResult, SearchResult>, SearchResult>(){
                private static final long serialVersionUID = 1L;

                public SearchResult call(Tuple2<SearchResult, SearchResult> v1) throws Exception {
                    return (SearchResult)v1._2();
                }
            });
        }
        return SparkDistributedDataset._mapPartitionsWithDatasetId(rdd, datasetId, streamingDataset);
    }

    @Override
    public DistributedDataset customUnion(UnionCommand ucmd) {
        if (ucmd.cursored()) {
            IOrdering[] sortByFields = new IOrdering[]{new SortNode("_dataset_index", SortOrder.ASC), new SortNode("_dataset_streaming", SortOrder.ASC), new SortNode("_time", SortOrder.DESC)};
            return this.sort(sortByFields);
        }
        return this;
    }

    @Override
    public DistributedDataset union(List<DistributedDataset> rhs, UnionCommand ucmd, boolean streaming) {
        assert (rhs.size() > 1);
        int totalPartitions = 0;
        for (int i = 0; i < rhs.size(); ++i) {
            totalPartitions += ((SparkDistributedDataset)rhs.get(i)).getSrRdd().getNumPartitions();
        }
        CommandNode[] sources = ucmd.getSources();
        int datasetId = 1;
        int sourcesIndex = 0;
        if (!streaming) {
            ++datasetId;
            sourcesIndex = rhs.size() == sources.length ? 0 : sources.length - rhs.size();
        }
        DistributedDataset dummydd = null;
        JavaRDD<SearchResult> urdd = this.getSrRdd();
        for (int i = 0; i < rhs.size(); ++i) {
            SparkDistributedDataset srdd = (SparkDistributedDataset)rhs.get(i);
            JavaRDD<SearchResult> rdd = srdd.getSrRdd();
            if (dummydd == null) {
                dummydd = srdd.getsriDD();
            }
            if (ucmd.sort()) {
                rdd = SparkDistributedDataset._sort_union_dataset(rdd, datasetId, streaming || sources[sourcesIndex].getCommandType() == CommandType.SP_STREAM, totalPartitions / rhs.size(), ucmd);
            }
            urdd = this.jsc.union(new JavaRDD[]{urdd, rdd});
            logger.debug((Object)("[rhs] fieldExtractionHint=" + (Object)((Object)srdd.fieldExtractionHint) + ", reducedByFields=" + srdd.reducedByFields + ", partitioner=" + rdd.partitioner() + ", No. partition=" + rdd.getNumPartitions()));
            if (!streaming) {
                ++datasetId;
            }
            ++sourcesIndex;
        }
        urdd = urdd.persist(StorageLevel.DISK_ONLY());
        if (ucmd.coalesce()) {
            urdd = urdd.coalesce(totalPartitions);
            logger.info((Object)String.format("coalesced union dd to: %d", totalPartitions));
        }
        if (ucmd.cursored()) {
            IOrdering[] sortByFields = new IOrdering[]{new SortNode("_dataset_index", SortOrder.ASC), new SortNode("_dataset_streaming", SortOrder.DESC), new SortNode("_time", SortOrder.DESC)};
            DistributedDataset sortedRdd = SparkDistributedDataset._sort(this.engine, this.ctx, this.jsc, urdd, this.sridd, null, sortByFields, this.fieldExtractionHint, this.n, this.reducedByFields, -1);
            urdd = ((SparkDistributedDataset)sortedRdd).getSrRdd();
        }
        logger.debug((Object)("[union] fieldExtractionHint=" + (Object)((Object)this.fieldExtractionHint) + ", reducedByFields=" + this.reducedByFields + ", partitioner=" + urdd.partitioner() + ", No. partition=" + urdd.getNumPartitions()));
        return new SparkDistributedDataset(this.engine, this.ctx, this.jsc, urdd, dummydd, FieldExtractor.ExtractionHint.NOT_PRECOMPUTED, -1L, this.reducedByFields, false);
    }

    @Override
    public DistributedDataset empty(FieldExtractor.ExtractionHint fieldExtractionHint) {
        ArrayList dummy = new ArrayList();
        JavaRDD ret = this.jsc.parallelize(dummy, 1);
        return new SparkDistributedDataset(this.engine, this.ctx, this.jsc, (JavaRDD<SearchResult>)ret, this.sridd, fieldExtractionHint, -1L, new HashSet<SearchResult.FieldMeta>(), false);
    }

    @Override
    public DistributedDataset last(long n) {
        return SparkDistributedDataset._last(this.engine, this.ctx, this.jsc, this.srRdd, this.sridd, n, this.fieldExtractionHint, this.reducedByFields, this.ordered);
    }

    private static DistributedDataset _last(ComputeEngine engine, ComputeEngineContext ctx, JavaSparkContext sc, JavaRDD<SearchResult> rdd, DistributedDataset sridd, final long n, FieldExtractor.ExtractionHint fieldExtractionHint, HashSet<SearchResult.FieldMeta> reducedByFields, boolean ordered) {
        if (n <= 0L) {
            throw new IllegalArgumentException("sdd last: limit value needs to be greater than 0.");
        }
        logger.debug((Object)String.format("Number of partitions: %d", rdd.getNumPartitions()));
        logger.debug((Object)String.format("Number of elements: %d", rdd.count()));
        logger.debug((Object)fieldExtractionHint.toString());
        logger.debug((Object)reducedByFields.toString());
        Function2<Integer, Iterator<SearchResult>, Iterator<SearchResult>> extractLastN = new Function2<Integer, Iterator<SearchResult>, Iterator<SearchResult>>(){
            private static final long serialVersionUID = 1L;

            public Iterator<SearchResult> call(Integer splitIdx, Iterator<SearchResult> iterator) throws Exception {
                LinkedList<SearchResult> fifo = new LinkedList<SearchResult>();
                while (iterator.hasNext()) {
                    fifo.add(iterator.next());
                    if ((long)fifo.size() <= n) continue;
                    fifo.pop();
                }
                return fifo.iterator();
            }
        };
        rdd = rdd.mapPartitionsWithIndex((Function2)extractLastN, true);
        Function2<Integer, Iterator<SearchResult>, Iterator<Tuple2<Tuple2<Integer, Integer>, SearchResult>>> assignUniqueId = new Function2<Integer, Iterator<SearchResult>, Iterator<Tuple2<Tuple2<Integer, Integer>, SearchResult>>>(){
            private static final long serialVersionUID = 1L;

            public Iterator<Tuple2<Tuple2<Integer, Integer>, SearchResult>> call(final Integer splitIdx, final Iterator<SearchResult> iterator) throws Exception {
                return new Iterator<Tuple2<Tuple2<Integer, Integer>, SearchResult>>(){
                    int elementIdx = 0;

                    @Override
                    public boolean hasNext() {
                        return iterator.hasNext();
                    }

                    @Override
                    public Tuple2<Tuple2<Integer, Integer>, SearchResult> next() {
                        Tuple2 id = new Tuple2((Object)splitIdx, (Object)this.elementIdx);
                        ++this.elementIdx;
                        return new Tuple2((Object)id, iterator.next());
                    }

                    @Override
                    public void remove() {
                    }
                };
            }
        };
        JavaRDD dataWithUniqueIdRDD = rdd.mapPartitionsWithIndex((Function2)assignUniqueId, true);
        JavaPairRDD dataWithUniqueIdPairRDD = dataWithUniqueIdRDD.mapToPair((PairFunction)new PairFunction<Tuple2<Tuple2<Integer, Integer>, SearchResult>, Tuple2<Integer, Integer>, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public Tuple2<Tuple2<Integer, Integer>, SearchResult> call(Tuple2<Tuple2<Integer, Integer>, SearchResult> row) throws Exception {
                return row;
            }
        });
        dataWithUniqueIdPairRDD = dataWithUniqueIdPairRDD.sortByKey((Comparator)new RowIdComparator());
        List pairList = dataWithUniqueIdPairRDD.take((int)n);
        ArrayList<Object> elements = new ArrayList<Object>();
        for (Tuple2 pair : pairList) {
            elements.add(pair._2());
        }
        rdd = sc.parallelize(elements, 1);
        return new SparkDistributedDataset(engine, ctx, sc, (JavaRDD<SearchResult>)rdd, sridd, fieldExtractionHint, n, reducedByFields, ordered);
    }

    @Override
    public DistributedDataset reverse() {
        return SparkDistributedDataset._reverse(this.engine, this.ctx, this.jsc, this.srRdd, this.sridd, this.fieldExtractionHint, this.n, this.reducedByFields, this.ordered);
    }

    private static DistributedDataset _reverse(ComputeEngine engine, ComputeEngineContext ctx, JavaSparkContext sc, JavaRDD<SearchResult> rdd, DistributedDataset sridd, FieldExtractor.ExtractionHint fieldExtractionHint, long n, HashSet<SearchResult.FieldMeta> reducedByFields, boolean ordered) {
        logger.debug((Object)String.format("Number of partitions: %d", rdd.getNumPartitions()));
        logger.debug((Object)String.format("Number of elements: %d", rdd.count()));
        logger.debug((Object)fieldExtractionHint.toString());
        logger.debug((Object)reducedByFields.toString());
        Function2<Integer, Iterator<SearchResult>, Iterator<Tuple2<Tuple2<Integer, Integer>, SearchResult>>> assignUniqueId = new Function2<Integer, Iterator<SearchResult>, Iterator<Tuple2<Tuple2<Integer, Integer>, SearchResult>>>(){
            private static final long serialVersionUID = 1L;

            public Iterator<Tuple2<Tuple2<Integer, Integer>, SearchResult>> call(final Integer splitIdx, final Iterator<SearchResult> iterator) throws Exception {
                return new Iterator<Tuple2<Tuple2<Integer, Integer>, SearchResult>>(){
                    int elementIdx = 0;

                    @Override
                    public boolean hasNext() {
                        return iterator.hasNext();
                    }

                    @Override
                    public Tuple2<Tuple2<Integer, Integer>, SearchResult> next() {
                        Tuple2 id = new Tuple2((Object)splitIdx, (Object)this.elementIdx);
                        ++this.elementIdx;
                        return new Tuple2((Object)id, iterator.next());
                    }

                    @Override
                    public void remove() {
                    }
                };
            }
        };
        JavaRDD dataWithUniqueIdRDD = rdd.mapPartitionsWithIndex((Function2)assignUniqueId, true);
        JavaPairRDD dataWithUniqueIdPairRDD = dataWithUniqueIdRDD.mapToPair((PairFunction)new PairFunction<Tuple2<Tuple2<Integer, Integer>, SearchResult>, Tuple2<Integer, Integer>, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public Tuple2<Tuple2<Integer, Integer>, SearchResult> call(Tuple2<Tuple2<Integer, Integer>, SearchResult> row) throws Exception {
                return row;
            }
        });
        dataWithUniqueIdPairRDD = dataWithUniqueIdPairRDD.sortByKey((Comparator)new RowIdComparator());
        FlatMapFunction<Iterator<Tuple2<Tuple2<Integer, Integer>, SearchResult>>, SearchResult> removeUniqueId = new FlatMapFunction<Iterator<Tuple2<Tuple2<Integer, Integer>, SearchResult>>, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public Iterator<SearchResult> call(final Iterator<Tuple2<Tuple2<Integer, Integer>, SearchResult>> iterator) throws Exception {
                return new Iterator<SearchResult>(){

                    @Override
                    public boolean hasNext() {
                        return iterator.hasNext();
                    }

                    @Override
                    public SearchResult next() {
                        return (SearchResult)((Tuple2)iterator.next())._2();
                    }

                    @Override
                    public void remove() {
                    }
                };
            }
        };
        rdd = dataWithUniqueIdPairRDD.mapPartitions((FlatMapFunction)removeUniqueId, true);
        return new SparkDistributedDataset(engine, ctx, sc, (JavaRDD<SearchResult>)rdd, sridd, fieldExtractionHint, n, reducedByFields, ordered);
    }

    @Override
    public DistributedDataset setSriDD(DistributedDataset sridd) {
        return new SparkDistributedDataset(this.engine, this.ctx, this.jsc, this.srRdd, sridd, this.fieldExtractionHint, this.n, this.reducedByFields, this.ordered);
    }

    @Override
    public DistributedDataset getsriDD() {
        return this.sridd;
    }

    private static DistributedDataset _mapPartitions(ComputeEngine engine, ComputeEngineContext ctx, JavaSparkContext jsc, JavaRDD<SearchResult> srRdd, DistributedDataset sridd, final MapPartitioner mapPartitioner, long n, HashSet<SearchResult.FieldMeta> reducedByFields, final int includeParId, boolean ordered) {
        if (mapPartitioner.repartition()) {
            srRdd = srRdd.repartition(srRdd.getNumPartitions() * 10);
            long count = srRdd.count();
            int numPars = Math.max(1, (int)(count / 200000L));
            srRdd = srRdd.repartition(numPars);
        }
        JavaRDD ret = srRdd.mapPartitionsWithIndex((Function2)new Function2<Integer, Iterator<SearchResult>, Iterator<SearchResult>>(){
            private static final long serialVersionUID = 1L;

            public Iterator<SearchResult> call(Integer parid, Iterator<SearchResult> srs) throws Exception {
                if (includeParId >= 0 && parid != includeParId) {
                    return new ArrayList().iterator();
                }
                return mapPartitioner.mapPartitions(parid, srs);
            }
        }, false);
        return new SparkDistributedDataset(engine, ctx, jsc, (JavaRDD<SearchResult>)ret, sridd, mapPartitioner.fieldExtractionHint(), n, reducedByFields, ordered);
    }

    @Override
    public DistributedDataset transform(MapPartitioner mapPartitioner) {
        return SparkDistributedDataset._mapPartitions(this.engine, this.ctx, this.jsc, this.srRdd, this.sridd, mapPartitioner, this.n, this.reducedByFields, -1, this.ordered);
    }

    @Override
    public DistributedDataset transform(MapPartitioner mapPartitioner, int includeParId) {
        return SparkDistributedDataset._mapPartitions(this.engine, this.ctx, this.jsc, this.srRdd, this.sridd, mapPartitioner, this.n, this.reducedByFields, includeParId, this.ordered);
    }

    @Override
    public void foreach(Actor actor) {
        SparkDistributedDataset._foreach(this.srRdd, actor);
    }

    private static void _foreach(JavaRDD<SearchResult> srrdd, final Actor actor) {
        srrdd.foreach((VoidFunction)new VoidFunction<SearchResult>(){
            private static final long serialVersionUID = 1L;

            public void call(SearchResult sr) throws Exception {
                actor.act(sr);
            }
        });
    }

    @Override
    public void setContext(ComputeEngineContext ctx) {
        this.ctx = ctx;
    }

    @Override
    public List<SearchResult> retrieve(int numSrs) {
        return this.srRdd.take(numSrs);
    }

    @Override
    public DistributedDataset dedup(DedupCommand dedupComand, boolean boostPartitions) {
        return SparkDistributedDataset._dedup(this.engine, this.ctx, this.jsc, this.srRdd, this.sridd, this.fieldExtractionHint, dedupComand, boostPartitions, this.ordered);
    }

    private static Pair<SearchResult, SearchResult> createDedupSplit(SearchResult sr, ArrayList<IOrdering> byFields, IOrdering[] sortFields) {
        SearchResult.SRHashMap<SearchResult.FieldMeta, Object> keySrData = new SearchResult.SRHashMap<SearchResult.FieldMeta, Object>(2);
        SearchResult.FieldMeta[] keyFieldNames = new SearchResult.FieldMeta[]{};
        Object[] keyFieldValues = new Object[]{};
        HashSet<SearchResult.FieldMeta> sortFieldNames = new HashSet<SearchResult.FieldMeta>();
        if (sortFields != null && sortFields.length > 0) {
            for (int i = 0; i < sortFields.length; ++i) {
                sortFieldNames.add(SearchResult.FieldMeta.newFieldMeta(sortFields[i].getField().getFieldName()));
            }
        }
        if (byFields != null) {
            int len = byFields.size();
            int initialCapacity = (int)((double)len / 0.75 + 1.0);
            keySrData = new SearchResult.SRHashMap(initialCapacity);
            keyFieldNames = new SearchResult.FieldMeta[len];
            keyFieldValues = new Object[len];
            for (int i = 0; i < len; ++i) {
                SearchResult.FieldMeta fieldName = SearchResult.FieldMeta.newFieldMeta(byFields.get(i).getField().getFieldName());
                Object val = sr.getFieldValue(fieldName);
                keyFieldNames[i] = fieldName;
                if (val == null) {
                    val = new SearchResultFactory.NullField(false);
                }
                if (sortFieldNames.contains(fieldName)) {
                    FieldType fieldType = byFields.get(i).getFieldType();
                    keyFieldValues[i] = val = SearchResultFactory.getInstance().convertValue(val, fieldType);
                    keySrData.put(fieldName, val);
                    continue;
                }
                keyFieldValues[i] = val;
                keySrData.put(fieldName, val);
            }
        }
        SearchResult keySr = SearchResultFactory.getInstance().createSearchResult(keySrData, keyFieldNames, keyFieldValues);
        return new Pair<SearchResult, SearchResult>(keySr, sr);
    }

    private static DistributedDataset _dedup(ComputeEngine engine, ComputeEngineContext ctx, JavaSparkContext sc, JavaRDD<SearchResult> rdd, DistributedDataset sridd, FieldExtractor.ExtractionHint fieldExtractionHint, DedupCommand dedupCommand, boolean boostPartitions, boolean ordered) {
        int i;
        final int limit = dedupCommand.getLimit();
        final boolean keepempty = dedupCommand.isKeepEmpty();
        final boolean keepevents = dedupCommand.isKeepEvents();
        FieldNode[] fields = dedupCommand.getFields();
        final LinkedHashSet<SearchResult.FieldMeta> dedupFields = new LinkedHashSet<SearchResult.FieldMeta>();
        for (int i2 = 0; i2 < fields.length; ++i2) {
            FieldNode field = fields[i2];
            String fieldName = field.getFieldName();
            dedupFields.add(SearchResult.FieldMeta.newFieldMeta(fieldName));
        }
        logger.info((Object)String.format("dedup fields: %s", dedupFields.toString()));
        final IOrdering[] sortByFields = dedupCommand.getSortBy();
        LinkedHashSet<SearchResult.FieldMeta> dedupSortFieldsSet = new LinkedHashSet<SearchResult.FieldMeta>();
        SortNode defaultSortField = new SortNode("_time", SortOrder.DESC);
        final SearchResult.FieldMeta[] dedupFieldsArray = dedupFields.toArray(new SearchResult.FieldMeta[dedupFields.size()]);
        final ArrayList<Object> dedupSortFields = new ArrayList<Object>();
        for (i = 0; i < dedupFieldsArray.length; ++i) {
            dedupSortFields.add(new SortNode(dedupFieldsArray[i].fieldName()));
            dedupSortFieldsSet.add(dedupFieldsArray[i]);
        }
        logger.info((Object)String.format("dedup sort fields: %s", dedupSortFieldsSet));
        if (sortByFields != null) {
            for (i = 0; i < sortByFields.length; ++i) {
                int j;
                boolean found = false;
                for (j = 0; j < dedupSortFields.size(); ++j) {
                    if (!((IOrdering)dedupSortFields.get(j)).getField().getFieldName().equals(sortByFields[i].getField().getFieldName())) continue;
                    found = true;
                    break;
                }
                if (found) {
                    dedupSortFields.remove(j);
                }
                dedupSortFields.add(sortByFields[i]);
                dedupSortFieldsSet.add(SearchResult.FieldMeta.newFieldMeta(sortByFields[i].getField().getFieldName()));
            }
        }
        if (!dedupSortFieldsSet.contains(SearchResult.FieldMeta.newFieldMeta("_time")) && boostPartitions) {
            dedupSortFields.add(defaultSortField);
            dedupSortFieldsSet.add(SearchResult.FieldMeta.newFieldMeta(defaultSortField.getField().getFieldName()));
        }
        logger.info((Object)String.format("final dedup sort fields set: %s, dedup sort fields: %s", dedupSortFieldsSet, dedupSortFields));
        IOrdering[] dedupSortFieldsArray = dedupSortFields.toArray(new IOrdering[dedupSortFields.size()]);
        if (!keepempty && !keepevents) {
            rdd = rdd.filter((Function)new Function<SearchResult, Boolean>(){
                private static final long serialVersionUID = 1L;

                public Boolean call(SearchResult sr) {
                    SearchResult.FieldMeta[] keys = sr.getFieldNames();
                    boolean result = Arrays.asList(keys).containsAll(dedupFields);
                    if (result) {
                        SearchResult.SRHashMap<SearchResult.FieldMeta, Object> data = sr.getDataMap();
                        for (int i = 0; i < dedupFieldsArray.length; ++i) {
                            if (((HashMap)data).get(dedupFieldsArray[i]) != null && !((HashMap)data).get(dedupFieldsArray[i]).toString().isEmpty()) continue;
                            return false;
                        }
                    }
                    return result;
                }
            });
        }
        JavaPairRDD srPairRdd = rdd.mapToPair((PairFunction)new PairFunction<SearchResult, SearchResult, SearchResult>(){
            private static final long serialVersionUID = 1L;

            public Tuple2<SearchResult, SearchResult> call(SearchResult sr) throws Exception {
                Pair pair = SparkDistributedDataset.createDedupSplit(sr, dedupSortFields, sortByFields);
                return new Tuple2(pair.first(), pair.second());
            }
        });
        int numPars = srPairRdd.getNumPartitions();
        if (boostPartitions) {
            String baseTransformer = (String)ctx.get("externalSource");
            if (baseTransformer.equals("NSPTransformer")) {
                numPars = srPairRdd.getNumPartitions() * 2;
            } else {
                srPairRdd = srPairRdd.partitionBy((Partitioner)new HashPartitioner(srPairRdd.getNumPartitions()));
                long count = srPairRdd.count();
                int numFields = fields.length;
                ExecutionHints hints = (ExecutionHints)ctx.get("execution.hints");
                numPars = hints.defaultPartitionCount(count, numFields);
            }
        }
        JavaPairRDD sortedSrPairRdd = srPairRdd.repartitionAndSortWithinPartitions((Partitioner)new DedupPartitioner(numPars, dedupFields), (Comparator)new SearchResultComparator(dedupSortFieldsArray));
        JavaRDD markedForDedupRdd = sortedSrPairRdd.mapPartitionsWithIndex((Function2)new Function2<Integer, Iterator<Tuple2<SearchResult, SearchResult>>, Iterator<SearchResult>>(){
            private static final long serialVersionUID = 1L;

            public Iterator<SearchResult> call(Integer partitionId, final Iterator<Tuple2<SearchResult, SearchResult>> srList) throws Exception {
                return new Iterator<SearchResult>(){
                    Integer count = 0;
                    SearchResult prevSr = null;

                    @Override
                    public boolean hasNext() {
                        return srList.hasNext();
                    }

                    @Override
                    public SearchResult next() {
                        Tuple2 kvSr = (Tuple2)srList.next();
                        SearchResult srKey = (SearchResult)kvSr._1;
                        SearchResult sr = (SearchResult)kvSr._2;
                        boolean addEmptyValueEvent = false;
                        SearchResultComparator comp = new SearchResultComparator(dedupFieldsArray, SortOrder.DESC);
                        if (srKey == null || this.prevSr == null || comp.compare(srKey, this.prevSr) != 0) {
                            this.prevSr = srKey;
                            this.count = 0;
                        }
                        SearchResult.SRHashMap<SearchResult.FieldMeta, Object> data = sr.getDataMap();
                        ((HashMap)data).put(SearchResult.FieldMeta.newFieldMeta("_dedupcount"), this.count);
                        if (keepempty) {
                            SearchResult.FieldMeta[] keys = sr.getFieldNames();
                            if (!Arrays.asList(keys).containsAll(dedupFields)) {
                                addEmptyValueEvent = true;
                            }
                            if (!addEmptyValueEvent) {
                                for (int i = 0; i < dedupFieldsArray.length; ++i) {
                                    if (((HashMap)data).get(dedupFieldsArray[i]) != null && !((HashMap)data).get(dedupFieldsArray[i]).toString().isEmpty()) continue;
                                    addEmptyValueEvent = true;
                                    break;
                                }
                            }
                        }
                        if (addEmptyValueEvent) {
                            ((HashMap)data).put(SearchResult.FieldMeta.newFieldMeta("_dedupcount"), 0);
                        }
                        Integer n = this.count;
                        Integer n2 = this.count = Integer.valueOf(this.count + 1);
                        return sr;
                    }

                    @Override
                    public void remove() {
                    }
                };
            }
        }, true);
        rdd = markedForDedupRdd.filter((Function)new Function<SearchResult, Boolean>(){
            private static final long serialVersionUID = 1L;

            public Boolean call(SearchResult sr) {
                SearchResult.SRHashMap<SearchResult.FieldMeta, Object> data = sr.getDataMap();
                if ((Integer)((HashMap)data).get(SearchResult.FieldMeta.newFieldMeta("_dedupcount")) < limit) {
                    ((HashMap)data).remove(SearchResult.FieldMeta.newFieldMeta("_dedupcount"));
                    return true;
                }
                if (keepevents) {
                    for (int i = 0; i < dedupFieldsArray.length; ++i) {
                        sr.updateField(dedupFieldsArray[i], SearchResultFactory.createStringField(ComputeEngineConstants.EMPTY_STRING_FIELD));
                    }
                    SearchResult.FieldMeta[] keys = sr.getFieldNames();
                    Object[] values = sr.getFieldValues();
                    for (int i = 0; i < keys.length; ++i) {
                        if (!dedupFields.contains(keys[i])) continue;
                        values[i] = SearchResultFactory.createStringField(ComputeEngineConstants.EMPTY_STRING_FIELD);
                    }
                    return true;
                }
                return false;
            }
        });
        if (sortByFields != null && sortByFields.length > 0) {
            return SparkDistributedDataset._sort(engine, ctx, sc, (JavaRDD<SearchResult>)rdd, sridd, null, sortByFields, FieldExtractor.ExtractionHint.NOT_PRECOMPUTED, limit, dedupFields, -1);
        }
        if (boostPartitions) {
            IOrdering[] sortFields = new IOrdering[]{new SortNode("_time", SortOrder.DESC)};
            return SparkDistributedDataset._sort(engine, ctx, sc, (JavaRDD<SearchResult>)rdd, sridd, null, sortFields, FieldExtractor.ExtractionHint.NOT_PRECOMPUTED, limit, dedupFields, -1);
        }
        return new SparkDistributedDataset(engine, ctx, sc, (JavaRDD<SearchResult>)rdd, sridd, FieldExtractor.ExtractionHint.NOT_PRECOMPUTED, limit, dedupFields, ordered);
    }

    @Override
    public boolean ordered() {
        return this.ordered;
    }

    @Override
    public DistributedDataset setExtractionHint(FieldExtractor.ExtractionHint extractionHint) {
        return new SparkDistributedDataset(this.engine, this.ctx, this.jsc, this.srRdd, this.sridd, extractionHint, this.n, this.reducedByFields, this.ordered);
    }

    @Override
    public DistributedDataset coalesce(int numPars) {
        int currPars = this.srRdd.getNumPartitions();
        if (numPars >= currPars) {
            logger.info((Object)String.format("sdd coallesce: will not coallesce since the requested partition count higher than the current count: %d, requested: %d", currPars, numPars));
            return this;
        }
        JavaRDD retSrRdd = this.srRdd.coalesce(numPars, false);
        return new SparkDistributedDataset(this.engine, this.ctx, this.jsc, (JavaRDD<SearchResult>)retSrRdd, this.sridd, this.fieldExtractionHint, this.n, this.reducedByFields, this.ordered);
    }

    @Override
    public ComputeEngine engine() {
        return this.engine;
    }

    @Override
    public DistributedDataset setName(String name) {
        this.srRdd.setName(name);
        return this;
    }

    public static class DedupPartitioner
    extends Partitioner
    implements Serializable {
        private static final long serialVersionUID = 1L;
        private int partitions;
        private HashSet<SearchResult.FieldMeta> dedupFieldsSet;

        public DedupPartitioner(int maxPartitions, HashSet<SearchResult.FieldMeta> dedupFields) {
            this.partitions = maxPartitions;
            this.dedupFieldsSet = dedupFields;
        }

        public int getPartitioningKeyHash(SearchResult sr, HashSet<SearchResult.FieldMeta> byFields) {
            int hashCode = 1;
            int prime = 31;
            if (byFields != null) {
                int i;
                int len = byFields.size();
                SearchResult.FieldMeta[] byFieldsArray = byFields.toArray(new SearchResult.FieldMeta[len]);
                Object[] keyFieldValues = new Object[len];
                for (i = 0; i < len; ++i) {
                    SearchResult.FieldMeta fieldName = byFieldsArray[i];
                    Object val = sr.getFieldValue(fieldName);
                    keyFieldValues[i] = val == null ? ComputeEngineConstants.EMPTY_STRING_FIELD : val;
                }
                for (i = 0; i < keyFieldValues.length; ++i) {
                    Object fieldValue = keyFieldValues[i];
                    hashCode = hashCode * prime + (fieldValue != null ? fieldValue.hashCode() : 0);
                }
            }
            return hashCode;
        }

        public int getPartition(Object key) {
            SearchResult sr = (SearchResult)key;
            int paritioningHash = this.getPartitioningKeyHash(sr, this.dedupFieldsSet);
            return Math.abs(paritioningHash % this.partitions);
        }

        public int numPartitions() {
            return this.partitions;
        }
    }

    private static class RowIdComparator
    implements Comparator<Tuple2<Integer, Integer>>,
    Serializable {
        private static final long serialVersionUID = 1L;

        private RowIdComparator() {
        }

        @Override
        public int compare(Tuple2<Integer, Integer> lhs, Tuple2<Integer, Integer> rhs) {
            if ((Integer)rhs._1() > (Integer)lhs._1()) {
                return 1;
            }
            if ((Integer)rhs._1() < (Integer)lhs._1()) {
                return -1;
            }
            if ((Integer)rhs._2() > (Integer)lhs._2()) {
                return 1;
            }
            if ((Integer)rhs._2() < (Integer)lhs._2()) {
                return -1;
            }
            return 0;
        }
    }
}

