Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.parquet;

import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.parquet.SemanticVersion.SemanticVersionParseException;
import org.apache.parquet.VersionParser.ParsedVersion;
Expand All @@ -35,6 +36,8 @@
*/
public class CorruptStatistics {
private static final AtomicBoolean alreadyLogged = new AtomicBoolean(false);
private static final ConcurrentHashMap<String, Boolean> CACHE = new ConcurrentHashMap<>();
private static final int MAX_CACHE_SIZE = 64;

private static final Logger LOG = LoggerFactory.getLogger(CorruptStatistics.class);

Expand Down Expand Up @@ -68,6 +71,12 @@ public static boolean shouldIgnoreStatistics(String createdBy, PrimitiveTypeName
return true;
}

return CACHE.size() >= MAX_CACHE_SIZE
? shouldIgnoreStatisticsForBinary(createdBy)
: CACHE.computeIfAbsent(createdBy, CorruptStatistics::shouldIgnoreStatisticsForBinary);
}

private static boolean shouldIgnoreStatisticsForBinary(String createdBy) {
try {
ParsedVersion version = VersionParser.parse(createdBy);

Expand Down Expand Up @@ -114,4 +123,14 @@ private static void warnOnce(String message) {
LOG.warn(message);
}
}

// Visible for testing
static int cacheSize() {
return CACHE.size();
}

// Visible for testing
static void clearCache() {
CACHE.clear();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,21 @@
*/
package org.apache.parquet;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.junit.Before;
import org.junit.Test;

public class CorruptStatisticsTest {

@Before
public void setUp() {
CorruptStatistics.clearCache();
}

@Test
public void testOnlyAppliesToBinary() {
assertTrue(CorruptStatistics.shouldIgnoreStatistics(
Expand Down Expand Up @@ -124,4 +131,49 @@ public void testDistributionCorruptStatistics() {
assertTrue(CorruptStatistics.shouldIgnoreStatistics(
"parquet-mr version 1.7.0 (build abcd)", PrimitiveTypeName.BINARY));
}

@Test
public void testCachingBehavior() {
assertEquals(0, CorruptStatistics.cacheSize());

// Call many times with the same createdBy — cache should store exactly one entry
String createdBy = "parquet-mr version 1.6.0 (build cache-test)";
for (int i = 0; i < 100; i++) {
CorruptStatistics.shouldIgnoreStatistics(createdBy, PrimitiveTypeName.BINARY);
}
assertEquals(1, CorruptStatistics.cacheSize());

// A different createdBy should add a second entry
CorruptStatistics.shouldIgnoreStatistics("parquet-mr version 1.9.0 (build abcd)", PrimitiveTypeName.BINARY);
assertEquals(2, CorruptStatistics.cacheSize());
}

@Test
public void testCorrectnessWhenCacheIsFull() {
CorruptStatistics.clearCache();

// Fill cache to capacity with 64 distinct corrupt-version strings
for (int i = 0; i < 64; i++) {
assertTrue(
"Corrupt version should be ignored",
CorruptStatistics.shouldIgnoreStatistics(
"parquet-mr version 1.6." + i + " (build x)", PrimitiveTypeName.BINARY));
}
assertEquals(64, CorruptStatistics.cacheSize());

// 65th distinct string bypasses cache -- must still return correct result
assertTrue(
"Cache-bypass path must still return correct result for corrupt version",
CorruptStatistics.shouldIgnoreStatistics(
"parquet-mr version 1.6.99 (build bypass)", PrimitiveTypeName.BINARY));

// Non-corrupt version also returns correct result when cache is full
assertFalse(
"Non-corrupt version must return false even when cache is full",
CorruptStatistics.shouldIgnoreStatistics(
"parquet-mr version 1.10.0 (build bypass2)", PrimitiveTypeName.BINARY));

// Cache did not grow beyond cap
assertEquals(64, CorruptStatistics.cacheSize());
}
}