InNumBatchesCollector.java

/*
 * Copyright 2020 Collect Utils Authors
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package net.morimekta.collect.collectors;

import net.morimekta.collect.UnmodifiableList;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;

import static net.morimekta.collect.UnmodifiableList.toList;
import static net.morimekta.collect.UnmodifiableSet.setOf;

public class InNumBatchesCollector<T> implements Collector<T, List<List<T>>, List<List<T>>> {
    private final AtomicInteger nextPos = new AtomicInteger();
    private final AtomicInteger numTotal = new AtomicInteger();
    private final int numBatches;

    public InNumBatchesCollector(int numBatches) {
        this.numBatches = numBatches;
    }

    @Override
    public Supplier<List<List<T>>> supplier() {
        return () -> {
            ArrayList<List<T>> batches = new ArrayList<>(numBatches);
            for (int i = 0; i < numBatches; ++i) {
                batches.add(new ArrayList<>());
            }
            return batches;
        };
    }

    @Override
    public BiConsumer<List<List<T>>, T> accumulator() {
        return (batches, item) -> {
            int pos = nextPos.getAndUpdate(i -> ++i % numBatches);
            batches.get(pos).add(item);
            numTotal.incrementAndGet();
        };
    }

    @Override
    public BinaryOperator<List<List<T>>> combiner() {
        return (a, b) -> {
            // Merge the two lists so the batches matches the order
            // of the non-parallel inBatchesOf with (a1..an) + (b1..bn)
            // as the set of items. It's not extremely efficient, but
            // works fine as this is not optimized for parallel streams.
            for (int i = 0; i < numBatches; ++i) {
                List<T> al = a.get(i);
                List<T> bl = b.get(i);
                al.addAll(bl);
            }
            return a;
        };
    }

    @Override
    public Function<List<List<T>>, List<List<T>>> finisher() {
        return batches -> {
            if (numTotal.get() < numBatches) {
                batches = batches.subList(0, numTotal.get());
            }
            return batches.stream()
                          .map(UnmodifiableList::asList)
                          .collect(toList());
        };
    }

    @Override
    public Set<Characteristics> characteristics() {
        return setOf(Characteristics.UNORDERED);
    }
}