Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
575 changes: 575 additions & 0 deletions TestTODO.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -23,82 +23,98 @@ public class ParameterUtils {
* This method is used by {@link StateNode#fromXML(Node)} to restore a parameter from its
* serialized state-file string, and must stay consistent with {@link #paramToString(StateNode)}.
* <p>
* In BEAST3, bounds are derived from the parameter's domain and are never written to the
* state file. The expected format is therefore always bound-free:
* The XML node's {@code id} attribute is set on {@code param} directly; the node's text
* content is the full {@link #paramToString(StateNode)} output, which still begins with the
* parameter ID. In BEAST 3, bounds are derived from the domain at runtime and are never
* written to the state file. Expected formats:
* <ul>
* <li>scalar: {@code kappa: 29}</li>
* <li>vector: {@code freqs{4}: 0.25 0.25 0.25 0.25}</li>
* <li>boolean scalar: {@code isEstimated: true}</li>
* <li>boolean vector: {@code isSelected{2}: true false}</li>
* </ul>
* A state file entry that still contains explicit bounds (BEAST2 legacy format such as
* {@code kappa{[0.0,Infinity]}: 29}) is rejected with {@link IllegalArgumentException}.
* A state file entry in the BEAST 2 format — where explicit bounds are embedded as
* {@code kappa [0.0 Infinity] (0.0,Infinity): 29 } — is rejected with
* {@link IllegalArgumentException}.
*
* @param node XML node whose text content is the serialized parameter string
* @param param the target {@link StateNode} to restore
* @throws IllegalArgumentException if the string contains legacy explicit bounds
* @throws IllegalArgumentException if the string matches the BEAST 2 bounded parameter format
* @throws RuntimeException if the string format is unrecognised
*/
public static void parseParameter(final Node node, StateNode param) {

final NamedNodeMap atts = node.getAttributes();
param.setID(atts.getNamedItem("id").getNodeValue());
final String id = atts.getNamedItem("id").getNodeValue();
param.setID(id);
final String str = node.getTextContent();

// Explicit bounds in state files are a BEAST2 legacy format.
// In BEAST3, bounds are derived from the domain (see BoundedParam removal).
// Fail fast so the user knows to restart rather than resume from such a file.
Pattern boundedPattern = Pattern.compile("^.*" +
"\\{" + "(?:(\\d+|\\[\\d+,\\s*\\d+\\]),\\s*)?" +
"[\\[\\(](.*),(.*)[\\]\\)]" + "\\}" +
":\\s*(.*)\\s*$");
if (boundedPattern.matcher(str).matches()) {
throw new IllegalArgumentException(
"XML file entry '" + str + "' contains explicit bounds, which are not " +
"supported in BEAST3. Bounds are now derived from the parameter domain; " +
"values can be constrained further using a prior distribution.");
// beast2 cases: 1. hky.frequencies[4 1] (-Infinity,Infinity): 0.2 0.2 0.2 0.4
// 2. hky.kappa[1 1] (0.0,Infinity): 5.0
Pattern b2pattern1 = Pattern.compile("^.*\\[(.*) (.*)\\].*\\((.*),(.*)\\):\\s*(.*)\\s*$");
Pattern b2pattern2 = Pattern.compile(".*\\[(.*)\\].*\\((.*),(.*)\\):\\s*(.*)\\s*$");
if (b2pattern1.matcher(str).matches() || b2pattern2.matcher(str).matches()) {
throw new IllegalArgumentException("XML file entry '" + str +
" is BEAST 2 version, please use BEAST 3 !");
}

// All BEAST3 parameter types serialize without explicit bounds.
// The non-greedy prefix (.*?) ensures the optional {shape} group is captured
// for vector types (e.g. "freqs{4}: 0.25 ..."), and the non-greedy suffix (.*?)
// lets the trailing \s* absorb any whitespace the vector loop appends.
// Format: id{shape}: value(s) — {shape} is absent for scalars.
// str is the full paramToString() output; the ID prefix is NOT pre-stripped.
// Examples: "hky.kappa: 2.5" or "freqParameter.s:primate{4}: 0.25 0.25 0.25 0.25"
//
// Segment 1 — ^.*?
// Non-greedy wildcard that skips the parameter ID. Non-greedy is required
// because the ID may itself contain colons (e.g. "freqParameter.s:primate"),
// so greedy .* would overshoot past the shape token {N}.
//
// Segment 2 — (?:\{(\d+|\[\d+,\s*\d+\])\})?
// The whole segment is optional (?:...)? — absent for scalar parameters.
// \{ \} literal braces that wrap the shape token
// (...) capturing group(1): the shape token itself, two alternatives:
// \d+ vector: one or more digits, e.g. "4" → matches {4}
// |
// \[\d+,\s*\d+\] matrix: literal "[", digits (rows), comma, optional
// whitespace \s*, digits (cols), literal "]"
// e.g. "[2,3]" or "[2, 3]" → matches {[2,3]}
// group(1) is null when the segment is absent (scalar).
//
// Segment 3 — :(?=[^:]*$)\s*(.*?)\s*$
// : literal colon — the key-value separator
// (?=[^:]*$) lookahead: [^:]* matches zero or more non-colon characters,
// anchored to $ (end of string). This asserts that no further
// colon exists after this one, so we always match the LAST colon
// even when the ID contains colons.
// \s* skips optional whitespace between the colon and the value
// (.*?) capturing group(2): the value string (non-greedy, so the
// trailing \s* below can absorb whitespace rather than group(2))
// \s*$ absorbs trailing whitespace (e.g. the space paramToString()
// appends after each vector element) without including it in group(2)
Pattern noboundPattern = Pattern.compile("^.*?" +
"\\{(\\d+),?\\d?\\s*\\}" +
":\\s*(.*?)\\s*$");
"(?:\\{(\\d+|\\[\\d+,\\s*\\d+\\])\\})?" +
":(?=[^:]*$)\\s*(.*?)\\s*$");
Matcher matcher = noboundPattern.matcher(str);

Pattern scalarPattern = Pattern.compile("^.*?" +
":\\s+(.*?)\\s*$");
Matcher scalarMatcher = scalarPattern.matcher(str);

String shape = null; // null for scalars
String valuesAsString = null;
if (matcher.matches()) {
shape = matcher.group(1);
valuesAsString = matcher.group(2);
} else if (scalarMatcher.matches()) {
valuesAsString = scalarMatcher.group(1);
final String shape = matcher.group(1); // null for scalars
final String valuesAsString = matcher.group(2);
final String[] valuesStr = valuesAsString.split("\\s+");

if (param instanceof RealScalarParam<?> realScalarParam) {
realScalarParam.fromXML(shape, valuesStr);
} else if (param instanceof IntScalarParam<?> intScalarParam) {
intScalarParam.fromXML(shape, valuesStr);
} else if (param instanceof RealVectorParam<?> realVectorParam) {
realVectorParam.fromXML(shape, valuesStr);
} else if (param instanceof IntVectorParam<?> intVectorParam) {
intVectorParam.fromXML(shape, valuesStr);
} else if (param instanceof BoolScalarParam boolScalar) {
boolScalar.fromXML(valuesStr[0]);
} else if (param instanceof BoolVectorParam boolVector) {
boolVector.fromXML(valuesStr);
} else
throw new RuntimeException("Unknown parameter type : " + param.getClass().getName());
} else {
throw new RuntimeException("String could not be parsed to parameter : " + str);
}
String[] valuesStr = valuesAsString.split("\\s+");
if (param instanceof RealScalarParam<?> realScalarParam) {
realScalarParam.fromXML(shape, valuesStr);
} else if (param instanceof IntScalarParam<?> intScalarParam) {
intScalarParam.fromXML(shape, valuesStr);
} else if (param instanceof RealVectorParam<?> realVectorParam) {
realVectorParam.fromXML(shape, valuesStr);
} else if (param instanceof IntVectorParam<?> intVectorParam) {
intVectorParam.fromXML(shape, valuesStr);
} else if (param instanceof BoolScalarParam boolScalar) {
boolScalar.fromXML(valuesStr[0]);
} else if (param instanceof BoolVectorParam boolVector) {
boolVector.fromXML(valuesStr);
} else {
throw new RuntimeException("Unknown parameter type : " + param.getClass().getName());
}

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
import beast.base.spec.domain.Int;
import beast.base.spec.domain.PositiveReal;
import beast.base.spec.domain.Real;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

import javax.xml.parsers.DocumentBuilderFactory;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.junit.jupiter.api.Assertions.*;

Expand Down Expand Up @@ -135,18 +138,16 @@ void testBoolVectorRoundTrip() throws Exception {
@Test
void testLegacyScalarBoundsThrows() throws Exception {
// BEAST2 format: explicit bounds in braces
String legacy = "kappa{[0.0,Infinity]}: 1.5";
String legacy = "kappa[1 1] (0.0,Infinity): 1.0";
RealScalarParam<PositiveReal> param = new RealScalarParam<>(1.0, PositiveReal.INSTANCE);
IllegalArgumentException ex = assertThrows(IllegalArgumentException.class,
() -> ParameterUtils.parseParameter(createNode("kappa", legacy), param));
assertTrue(ex.getMessage().contains("explicit bounds"));
assertTrue(ex.getMessage().contains("prior distribution"));
}

@Test
void testLegacyVectorBoundsThrows() throws Exception {
// BEAST2 format: shape + explicit bounds
String legacy = "freqs{4, [0.0,1.0]}: 0.25 0.25 0.25 0.25";
String legacy = "hky.frequencies[4 1] (-Infinity,Infinity): 0.25, 0.25, 0.25, 0.25";
RealVectorParam<Real> param = new RealVectorParam<>(new double[]{0.25, 0.25, 0.25, 0.25}, Real.INSTANCE);
assertThrows(IllegalArgumentException.class,
() -> ParameterUtils.parseParameter(createNode("freqs", legacy), param));
Expand All @@ -171,4 +172,145 @@ void testParamToStringVectorHasShapeNoBoundsComma() {
assertTrue(s.contains("{2}"), "Vector toString must contain '{2}', got: " + s);
assertFalse(s.contains("{2,"), "Vector toString must not contain legacy '{2,' format, got: " + s);
}

// ------------------------------------------------------------------ noboundPattern regex

/**
* White-box tests for the regex used inside {@link ParameterUtils#parseParameter}.
* The pattern is replicated here so each token can be exercised in isolation
* without wiring up real StateNode objects.
*
* Pattern (unescaped):
* ^.*? (?:\{(\d+|\[\d+,\s*\d+\])\})? :(?=[^:]*$)\s*(.*?)\s*$
*
* group(1) — shape token: integer N (vector) or [r,c] (matrix); null for scalars
* group(2) — trimmed value string
*/
@Nested
class NoboundPatternTest {

private static final Pattern PATTERN = Pattern.compile("^.*?" +
"(?:\\{(\\d+|\\[\\d+,\\s*\\d+\\])\\})?" +
":(?=[^:]*$)\\s*(.*?)\\s*$");

private Matcher match(String input) {
Matcher m = PATTERN.matcher(input);
assertTrue(m.matches(), "Expected pattern to match: «" + input + "»");
return m;
}

// -- Segment 1: ^.*? (ID may contain colons; last colon wins) --------

@Test
void scalarSimpleId() {
// plain ID with no colon — group(1) null, group(2) = value
Matcher m = match("hky.kappa: 21.471014150629927");
assertNull(m.group(1));
assertEquals("21.471014150629927", m.group(2));
}

@Test
void scalarIdWithEmbeddedColon() {
// ID itself contains a colon; the LAST colon is the separator
Matcher m = match("freqParameter.s:primate: 0.25");
assertNull(m.group(1));
assertEquals("0.25", m.group(2));
}

@Test
void booleanScalar() {
Matcher m = match("isEstimated: true");
assertNull(m.group(1));
assertEquals("true", m.group(2));
}

@Test
void scalarWithLeadingSpace() {
// ^.*? absorbs leading whitespace as part of the ID prefix
Matcher m = match(" kappa: 29");
assertNull(m.group(1));
assertEquals("29", m.group(2));
}

// -- Segment 2 branch A: \d+ (vector size) ---------------------------

@Test
void vectorSize() {
Matcher m = match("freqs{4}: 0.25 0.25 0.25 0.25");
assertEquals("4", m.group(1));
assertEquals("0.25 0.25 0.25 0.25", m.group(2));
}

@Test
void vectorSizeWithColonInId() {
// the original bug: greedy .* consumed {4}; non-greedy fixes it
Matcher m = match("freqParameter.s:primate{4}: 0.2415671624255229 0.25 0.25 0.2584328375744771");
assertEquals("4", m.group(1));
assertEquals("0.2415671624255229 0.25 0.25 0.2584328375744771", m.group(2));
}

@Test
void booleanVector() {
Matcher m = match("isSelected{3}: true false true");
assertEquals("3", m.group(1));
assertEquals("true false true", m.group(2));
}

@Test
void singleElementVector() {
Matcher m = match("x{1}: 0.5");
assertEquals("1", m.group(1));
assertEquals("0.5", m.group(2));
}

// -- Segment 2 branch B: \[\d+,\s*\d+\] (matrix shape) --------------
//TODO not support yet
// @Test
// void matrixShapeNoSpace() {
// Matcher m = match("rates{[2,3]}: 1.0 2.0 3.0 4.0 5.0 6.0");
// assertEquals("[2,3]", m.group(1));
// assertEquals("1.0 2.0 3.0 4.0 5.0 6.0", m.group(2));
// }
//
// @Test
// void matrixShapeWithSpaceAfterComma() {
// // \s* inside [r,c] allows "[ r, c ]"-style whitespace
// Matcher m = match("rates{[2, 3]}: 1.0 2.0 3.0 4.0 5.0 6.0");
// assertEquals("[2, 3]", m.group(1));
// assertEquals("1.0 2.0 3.0 4.0 5.0 6.0", m.group(2));
// }

// -- Segment 3: :(?=[^:]*$)\s*(.*?)\s*$ (colon anchor + value trim) --

@Test
void trailingSpaceAbsorbed() {
// paramToString() appends a space after each vector element
Matcher m = match("freqs{4}: 0.25 0.25 0.25 0.25 ");
assertEquals("4", m.group(1));
assertEquals("0.25 0.25 0.25 0.25", m.group(2)); // no trailing space
}

@Test
void extraWhitespaceAfterColon() {
// \s* between colon and value is consumed, not included in group(2)
Matcher m = match("kappa: 1.5");
assertNull(m.group(1));
assertEquals("1.5", m.group(2));
}

@Test
void lastColonChosenWhenMultiplePresent() {
// three colons in the string; the third (last) is the separator
Matcher m = match("a:b:c: 99");
assertNull(m.group(1));
assertEquals("99", m.group(2));
}

// -- Non-matching inputs -----------------------------------------------

@Test
void noColonDoesNotMatch() {
assertFalse(PATTERN.matcher("kappaNoColon").matches());
}
}
}
24 changes: 2 additions & 22 deletions beast-base/src/test/java/beast/base/spec/type/ScalarTest.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package beast.base.spec.type;

import beast.base.spec.domain.Int;
import org.junit.Test;
import org.junit.jupiter.api.Test;

import static org.junit.Assert.*;
import static org.junit.jupiter.api.Assertions.*;

/**
* Test class for the Scalar interface
Expand All @@ -25,28 +25,8 @@ public Int getDomain() {
return domain;
}

@Override
public int rank() {
return 0; // Scalar has rank 0
}

@Override
public int[] shape() {
return new int[] {}; // Scalar has no shape
}

public Integer getValue() {
return value;
}

@Override
public boolean isValid(Integer value) {
return domain.isValid(value); // Validate against domain constraints
}

@Override
public Integer get(int... idx) {
// TODO Auto-generated method stub
return null;
}
}
Expand Down
Loading