From c8997a9b9db3c662732e5f38d78fa63e771b0715 Mon Sep 17 00:00:00 2001 From: Clarmy Lee Date: Thu, 16 Oct 2025 18:37:22 +0800 Subject: [PATCH 1/2] feat: Enhance METAR validation and parsing functionality - Updated regex patterns in `validate_metar` to improve accuracy in parsing METAR messages, including handling of RMK remarks and isolated digits. - Added comprehensive test cases to validate various METAR formats, including edge cases for wind, visibility, and temperature. - Improved weather description handling in parsing to ensure correct word order. - Introduced additional validation rules for suspicious fields and isolated values in METAR reports. --- pymetaf/parser.py | 559 ++++++++++++++++++++-------- tests/case/metar/parse_text_case.py | 101 ++++- tests/test_validation.py | 447 ++++++++++++++++++---- 3 files changed, 885 insertions(+), 222 deletions(-) diff --git a/pymetaf/parser.py b/pymetaf/parser.py index 64d5811..309bb5d 100644 --- a/pymetaf/parser.py +++ b/pymetaf/parser.py @@ -49,8 +49,8 @@ "trend": r"(TEMPO|BECMG|NOSIG).*?(?= TEMPO| BECMG| NOSIG|=)", # Change start and end time "vartime": r"(FM|TL|AT)\d{4}", - # Current observation - "observ": r"(METAR|SPECI|TAF).*?(?= TEMPO| BECMG| NOSIG)", + # Current observation (stop before TEMPO/BECMG/NOSIG/RMK to avoid parsing remarks) + "observ": r"(METAR|SPECI|TAF).*?(?= TEMPO| BECMG| NOSIG| RMK)", # Forecast valid time "validtime": r"\b\d{6}\b", # Forecast cancellation indicator @@ -66,18 +66,18 @@ def validate_metar(text, strict_mode=False): """Validate METAR message format - + Args: text (str): The original METAR message text to validate strict_mode (bool): Whether to use strict mode. True: RMK remarks section not allowed False: Allow RMK remarks but check their validity - + Returns: tuple: (is_valid, error_message) is_valid (bool): Whether the message is valid error_message (str): Error message if invalid; None if valid - + Examples: >>> validate_metar("METAR ZBAA 311400Z 01002MPS CAVOK 14/12 Q1009 NOSIG=") (True, None) @@ -86,76 +86,119 @@ def validate_metar(text, strict_mode=False): """ if not text or not isinstance(text, str): return False, "Empty or invalid input" - + # Remove possible trailing equals sign text_clean = text.rstrip("=").strip() - + # Check for line breaks (should be single line) - if '\n' in text_clean or '\r' in text_clean: + if "\n" in text_clean or "\r" in text_clean: return False, "Contains line breaks (should be single line)" - + # Check for MK spelling error (should be RMK) - if re.search(r'\sMK\s', text_clean): + if re.search(r"\sMK\s", text_clean): return False, "Spelling error: MK (should be RMK)" - + + # Check for isolated single digit at the end (before final cleanup) + # e.g. "NOSIG 9 =" or "Q1017 3 =" + # This check must be done before TREND separation + parts_preliminary = text_clean.split() + if len(parts_preliminary) >= 1: + last_preliminary = parts_preliminary[-1] + if re.match(r"^\d$", last_preliminary): + return False, f"Isolated digit at ending: {last_preliminary}" + # Check if there is RMK remarks section # IMPORTANT: RMK must come AFTER TREND section, not before - has_rmk = 'RMK' in text_clean + has_rmk = "RMK" in text_clean if has_rmk: # Separate content before and after RMK - parts_split = text_clean.split('RMK', 1) + parts_split = text_clean.split("RMK", 1) main_part = parts_split[0].strip() rmk_part = parts_split[1].strip() if len(parts_split) > 1 else "" - + # In strict mode, RMK section is not allowed if strict_mode: return False, "RMK remarks section not allowed in strict mode" - + # Check if TREND keywords (BECMG/TEMPO) appear in RMK section # This is a position error - TREND must come before RMK - for keyword in ['BECMG', 'TEMPO']: + for keyword in ["BECMG", "TEMPO"]: if keyword in rmk_part: - return False, f"TREND keyword {keyword} found in RMK section (should be before RMK)" - + return ( + False, + f"TREND keyword {keyword} found in RMK section (should be before RMK)", + ) + # RMK is free text remarks - no other content validation # Just keep it as is for downstream processing else: main_part = text_clean - + # Check main part for invalid special characters (allowed: letters, numbers, space, /, +, -) - invalid_chars = re.findall(r'[^A-Za-z0-9\s/+\-]', main_part) + invalid_chars = re.findall(r"[^A-Za-z0-9\s/+\-]", main_part) if invalid_chars: return False, f"Contains invalid characters: {set(invalid_chars)}" - + # Check common spelling errors (use word boundaries to avoid false positives) - if re.search(r'\bEMPO\b', main_part): # Should be TEMPO + if re.search(r"\bEMPO\b", main_part): # Should be TEMPO return False, "Spelling error: EMPO (should be TEMPO)" - if re.search(r'\bTRMPO\b', main_part): # Should be TEMPO + if re.search(r"\bTRMPO\b", main_part): # Should be TEMPO return False, "Spelling error: TRMPO (should be TEMPO)" - if re.search(r'\bECMG\b', main_part): # Should be BECMG + if re.search(r"\bECMG\b", main_part): # Should be BECMG return False, "Spelling error: ECMG (should be BECMG)" - if re.search(r'\bBCECMG\b', main_part): # Should be BECMG + if re.search(r"\bBCECMG\b", main_part): # Should be BECMG return False, "Spelling error: BCECMG (should be BECMG)" - + + # Check for double letter errors in NOSIG + if re.search(r"\bNNOSIG\b", main_part): # Should be NOSIG + return False, "Spelling error: NNOSIG (should be NOSIG)" + if re.search(r"\bNOSSIG\b", main_part): # Should be NOSIG + return False, "Spelling error: NOSSIG (should be NOSIG)" + + # Check for NOSIG spelling variations + if re.search(r"\bNOAISIG\b", main_part): # NOAI instead of NOSI + return False, "Spelling error: NOAISIG (should be NOSIG)" + if re.search(r"\bNOSZ\b", main_part): # NOSZ instead of NOSIG + return False, "Spelling error: NOSZ (should be NOSIG)" + # Check for various BECMG spelling errors becmg_errors = [ - 'BCNG', 'BECNG', 'BCEMG', 'BECML', 'BECMFG', 'BECMGG', 'BECMGA', 'BGECMG', - 'BECGG', 'BEEMG', 'BEMG', 'MECMG', 'BECMF', 'BECMGM' + "BCNG", + "BECNG", + "BCEMG", + "BECML", + "BECMFG", + "BECMGG", + "BECMGA", + "BGECMG", + "BECGG", + "BEEMG", + "BEMG", + "MECMG", + "BECMF", + "BECMGM", ] for error in becmg_errors: - if re.search(r'\b' + error + r'\b', main_part): + if re.search(r"\b" + error + r"\b", main_part): return False, f"Spelling error: {error} (should be BECMG)" - + + # Check for BECMG/TEMPO stuck with time indicators + # BECMGTL0130, TEMPOFM0500, etc. + if re.search(r"\bBECMG(FM|TL|AT)\d{4}\b", main_part): + return False, "BECMG stuck with time indicator (missing space)" + if re.search(r"\bTEMPO(FM|TL|AT)\d{4}\b", main_part): + return False, "TEMPO stuck with time indicator (missing space)" + # Check for placeholders - if re.search(r'Q{5,}', main_part): # QQQQQQQQ... + if re.search(r"Q{5,}", main_part): # QQQQQQQQ... return False, "Contains placeholder (repeated Q)" - + # Separate TREND section (NOSIG/BECMG/TEMPO) from main observation # TREND is at the end and contains change forecasts - trend_keywords = ['NOSIG', 'BECMG', 'TEMPO'] + trend_keywords = ["NOSIG", "BECMG", "TEMPO"] has_trend = False trend_start_idx = -1 - + for keyword in trend_keywords: if keyword in main_part: has_trend = True @@ -167,34 +210,34 @@ def validate_metar(text, strict_mode=False): break if trend_start_idx > 0: break - + # Separate main observation and trend parts if has_trend and trend_start_idx > 0: parts_all = main_part.split() main_obs_parts = parts_all[:trend_start_idx] trend_parts = parts_all[trend_start_idx:] - main_obs_text = ' '.join(main_obs_parts) + main_obs_text = " ".join(main_obs_parts) else: main_obs_text = main_part trend_parts = [] - + # Check minimum message length if len(main_obs_text) < 20: return False, "METAR text too short" - + parts = main_obs_text.split() if len(parts) < 4: return False, "Missing essential fields" - + # Locate field indices idx = 0 - + # 1. Check report type (first field should be METAR/SPECI/TAF) # If first field looks like ICAO code, report type is missing - icao_pattern = re.compile(r'^[A-Z]{4}$') + icao_pattern = re.compile(r"^[A-Z]{4}$") if icao_pattern.match(parts[idx]): return False, f"Missing report type (METAR/SPECI): starts with {parts[idx]}" - + if parts[idx] in ["METAR", "SPECI", "TAF"]: idx += 1 # Check for COR @@ -202,54 +245,73 @@ def validate_metar(text, strict_mode=False): idx += 1 else: return False, f"Invalid or missing report type: {parts[idx]}" - + # 2. Check ICAO code (must be 4 uppercase letters) if idx >= len(parts): return False, "Missing ICAO code" - + if not icao_pattern.match(parts[idx]): return False, f"Invalid ICAO code format: {parts[idx]}" idx += 1 - + # 3. Check time group (must be 6 digits + Z, day part cannot exceed 31) if idx >= len(parts): return False, "Missing time group" - - time_pattern = re.compile(r'^(\d{2})(\d{4})Z$') + + time_pattern = re.compile(r"^(\d{2})(\d{4})Z$") time_match = time_pattern.match(parts[idx]) if not time_match: return False, f"Invalid time format: {parts[idx]}" - + day = int(time_match.group(1)) if day < 1 or day > 31: return False, f"Invalid day in time group: {day}" idx += 1 - + # If it's a NIL report, we're done here if idx < len(parts) and parts[idx] == "NIL": return True, None - + # Check for AUTO if idx < len(parts) and parts[idx] == "AUTO": idx += 1 - + # 4. Check wind group (may exist, check format) if idx < len(parts): - wind_pattern = re.compile(r'^((\d{3}|VRB)\d{2}(G\d{2})?(MPS|KT)|/{5}(MPS|KT))$') + wind_pattern = re.compile(r"^((\d{3}|VRB)\d{2}(G\d{2})?(MPS|KT)|/{5}(MPS|KT))$") # Check for wind-like fields with incorrect format - wind_like_pattern = re.compile(r'^\d{1,5}(MPS|KT|PS)$') + wind_like_pattern = re.compile(r"^\d{1,5}(MPS|KT|PS)$") # Check for spacing errors like "12001MPSH4000" or "30007MPSG13" - wind_spacing_error = re.compile(r'^\d{5}MPS[A-Z]|\d{5}MPSG\d+$') + wind_spacing_error = re.compile(r"^\d{5}MPS[A-Z]|\d{5}MPSG\d+$") # Check for wind variation concatenation errors like "18003MPSV220" - wind_var_error = re.compile(r'^\d{5}MPSV\d+$') - + wind_var_error = re.compile(r"^\d{5}MPSV\d+$") + # Check for invalid wind patterns like 1800C, 41MPS (wrong number of digits) + invalid_wind_pattern = re.compile(r"^\d{4}[A-Z]$|^\d{1,2}(MPS|KT)$") + # Check for incomplete gust: 000G, 12003G, etc. (missing gust value) + incomplete_gust_pattern = re.compile(r"^\d{3,5}G$") + # Check for wrong wind units: UKT, M, etc. + wrong_unit_pattern = re.compile(r"^[A-Z]{2,4}$") # 2-4 letters, not valid unit + if wind_pattern.match(parts[idx]): idx += 1 # Check for possible wind direction variation if idx < len(parts): - wind_var_pattern = re.compile(r'^\d{3}V\d{3}$') + wind_var_pattern = re.compile(r"^\d{3}V\d{3}$") if wind_var_pattern.match(parts[idx]): idx += 1 + elif invalid_wind_pattern.match(parts[idx]): + # Invalid wind format: 4 digits + letter, or 1-2 digits + unit + return False, f"Invalid wind format: {parts[idx]}" + elif incomplete_gust_pattern.match(parts[idx]): + # Incomplete gust: has G but no gust value + # Check if next part is a wrong unit + if idx + 1 < len(parts) and wrong_unit_pattern.match(parts[idx + 1]): + return ( + False, + f"Invalid wind format: {parts[idx]} {parts[idx + 1]} (incomplete gust with wrong unit)", + ) + else: + return False, f"Invalid wind format: {parts[idx]} (incomplete gust)" elif wind_like_pattern.match(parts[idx]): # Looks like wind group but format is wrong return False, f"Invalid wind format: {parts[idx]}" @@ -259,22 +321,26 @@ def validate_metar(text, strict_mode=False): elif wind_var_error.match(parts[idx]): # Wind variation information concatenated return False, f"Wind variation spacing error: {parts[idx]}" - + # Check if current part looks like incomplete wind unit + elif re.match(r"^\d{5}M$", parts[idx]): + # e.g., 01006M (missing PS from MPS) + return False, f"Invalid wind format: {parts[idx]} (incomplete unit)" + # 5. Check pressure group (if exists, must be Q or A followed by 4 digits or ////) # Search for pressure group anywhere in the message qnh_found = False - qnh_pattern = re.compile(r'^[AQ]\d{4}$') - qnh_missing_pattern = re.compile(r'^[AQ]/{4}$') # Q//// or A//// means missing data - + qnh_pattern = re.compile(r"^[AQ]\d{4}$") + qnh_missing_pattern = re.compile(r"^[AQ]/{4}$") # Q//// or A//// means missing data + # Known keywords that start with A or Q and are not QNH - known_keywords = ['AUTO', 'AT'] # AT is for TREND time indicator like AT1600 - + known_keywords = ["AUTO", "AT"] # AT is for TREND time indicator like AT1600 + for part in parts: - if part.startswith('Q') or part.startswith('A'): + if part.startswith("Q") or part.startswith("A"): # Skip known keywords - if part in known_keywords or part.startswith('AT') and len(part) == 6: + if part in known_keywords or part.startswith("AT") and len(part) == 6: continue - + if qnh_pattern.match(part): qnh_found = True break @@ -285,128 +351,259 @@ def validate_metar(text, strict_mode=False): else: # If starts with Q or A but format is wrong, this is an error return False, f"Invalid QNH format: {part}" - + # 6. Check for abnormal character combinations at end # End should be NOSIG, TEMPO, BECMG or other valid fields last_part = parts[-1] - + # If last field is a valid ending field, skip check - valid_endings = ['NOSIG', 'TEMPO', 'BECMG', 'NIL'] - - # Check for spacing errors like "NOSI G" + valid_endings = ["NOSIG", "TEMPO", "BECMG", "NIL"] + + # Check for spacing errors like "NOSI G" and spelling errors like "NOAI SIG" if len(parts) >= 2: last_two_combined = parts[-2] + parts[-1] - if last_two_combined in ['NOSIG', 'TEMPO', 'BECMG']: + + # Direct match for valid keywords that were split + if last_two_combined in ["NOSIG", "TEMPO", "BECMG"]: return False, f"Invalid spacing in ending: {parts[-2]} {parts[-1]}" - + + # Check for NOSIG spelling variations + # NOAI SIG, NOSI G, NO SIG, etc. + nosig_like_patterns = [ + "NOAISIG", + "NOAI SIG", # NOAI instead of NOSI + "NOIG", + "NOSI G", + "NO SIG", + ] + + combined_with_space = parts[-2] + " " + parts[-1] + if ( + last_two_combined in nosig_like_patterns + or combined_with_space in nosig_like_patterns + ): + return ( + False, + f"Invalid spacing/spelling in ending: {parts[-2]} {parts[-1]} (should be NOSIG)", + ) + # Check for single letter ending (without RMK, this is usually an error) # e.g. "Q1003 N=" or "FEW015 S=" - if not has_rmk and re.match(r'^[A-Z]$', last_part): + if not has_rmk and re.match(r"^[A-Z]$", last_part): return False, f"Invalid single letter ending: {last_part}" - + if last_part not in valid_endings: # Check if last field contains abnormal combinations invalid_endings = [ - r'^NOSIT$', # NOSIG spelling error - r'^NOSI$', # NOSI (NOSIG missing G) - r'^OSIG$', # Missing N - r'^DUPE$', # Duplicate report marker, should not appear + r"^NOSIT$", # NOSIG spelling error + r"^NOSI$", # NOSI (NOSIG missing G) + r"^OSIG$", # Missing N + r"^DUPE$", # Duplicate report marker, should not appear ] - + for pattern in invalid_endings: if re.search(pattern, last_part): return False, f"Invalid ending: {last_part}" - - # 7. Check for isolated single digits or letters (only in main observation, not in TREND) + + # 7. Check for suspicious short fields (1-2 characters) + # Pattern: single digit + single letter (e.g., 0K, 1A, 9Z) + for part in parts: + # Check for digit+letter pattern (usually invalid) + if re.match(r"^\d[A-Z]$", part): + # Exception: temperature like M2 is valid in some contexts + # But 0K, 1A, etc. are suspicious + return False, f"Suspicious field: {part}" + + # Check for temperature with + prefix (invalid format) + # Temperature should be M?\d{2}/M?\d{2}, not +\d+/... + if "/" in part and part.startswith("+"): + return ( + False, + f"Invalid temperature format: {part} (+ not allowed in temperature)", + ) + + # Check for temperature with insufficient digits (must be 2 digits each side) + # Valid: 20/10, M04/M10, 02/M12 + # Invalid: 0/10, 5/M3 + if "/" in part and not part.startswith("+"): + temp_pattern = re.compile(r"^M?\d{2}/M?\d{2}$") + if not temp_pattern.match(part): + # Check if it looks like temperature but format is wrong + if re.match(r"^M?\d+/M?\d+$", part): + return ( + False, + f"Invalid temperature format: {part} (must be 2 digits each side)", + ) + + # Check for invalid visibility format + # Valid: 4 digits (9999, 0200), or digit+SM (10SM) + # Invalid: 5+ digits followed by letter(s) like 60008P + # But exclude: time group (6 digits + Z), wind (5 digits + MPS/KT) + if re.match(r"^\d{5,}[A-Z]+$", part): + # Exclude time group + if re.match(r"^\d{6}Z$", part): + continue + # Exclude wind group + if re.match(r"^\d{5}(MPS|KT)$", part): + continue + # This is suspicious + return False, f"Invalid visibility format: {part}" + + # 7. Check if message contains at least one basic observation element + # A valid METAR should have at least one of: visibility, weather, cloud, temperature, QNH + # After METAR/SPECI, ICAO, time, and possibly wind, there should be more fields + # If only 4 fields (METAR ICAO TIME WIND), it's too short + if len(parts) <= 4: + return False, "Missing observation data (visibility/cloud/temperature/QNH)" + + # Check if it has at least one valid observation field + has_observation = False + for part in parts[3:]: # Skip METAR, ICAO, TIME + if ( + re.match(r"^\d{4}$", part) # Visibility + or re.match(r"^(FEW|SCT|BKN|OVC)\d{3}", part) # Cloud + or part in ["SKC", "NSC", "CAVOK", "CLR"] # Sky condition + or re.match(r"^M?\d{2}/M?\d{2}$", part) # Temperature + or re.match(r"^[AQ]\d{4}$", part) + ): # QNH + has_observation = True + break + + if not has_observation: + return False, "Missing observation data (visibility/cloud/temperature/QNH)" + + # 8. Check for isolated single digits or letters (only in main observation, not in TREND) for i, part in enumerate(parts): # Skip known valid single letter/digit cases - if part in ['M', 'P', 'U', 'D', 'N']: # These are valid in certain contexts + if part in ["M", "P", "U", "D", "N"]: # These are valid in certain contexts # Check context, if they are isolated (neither prev nor next are appropriate), report error if i > 0 and i < len(parts) - 1: # Check if in reasonable context - prev_part = parts[i-1] - next_part = parts[i+1] + prev_part = parts[i - 1] + next_part = parts[i + 1] # If neither prev nor next is digit or RVR related, may be abnormal - if not (prev_part.startswith('R') or next_part.isdigit()): + if not (prev_part.startswith("R") or next_part.isdigit()): return False, f"Isolated character: {part}" - + # Check for isolated single digit if part.isdigit() and len(part) == 1: return False, f"Isolated digit: {part}" - + # 8. Check for obviously wrong fields in main observation (not in TREND) # TREND may contain time indicators like TL1440, FM1520, AT1600 which are valid for part in parts[idx:]: # Skip known valid formats - if (qnh_pattern.match(part) or - re.match(r'^\d{4}$', part) or # 4 digits (visibility) - re.match(r'^[A-Z]+$', part) or - re.match(r'^M?\d+/M?\d+$', part) or # Temperature/dewpoint - re.match(r'^R\d+', part) or # RVR - re.match(r'^\d{3}V\d{3}$', part) or # Wind direction variation - re.match(r'^(FEW|SCT|BKN|OVC|SKC|NSC)', part) or # Cloud group - re.match(r'^VV\d{3}$', part) or # Vertical visibility - re.match(r'^[/]+$', part)): # Slashes (indicate missing data) + if ( + qnh_pattern.match(part) + or re.match(r"^\d{4}$", part) # 4 digits (visibility) + or re.match(r"^[A-Z]+$", part) + or re.match(r"^M?\d+/M?\d+$", part) # Temperature/dewpoint + or re.match(r"^R\d+", part) # RVR + or re.match(r"^\d{3}V\d{3}$", part) # Wind direction variation + or re.match( + r"^(FEW|SCT|BKN|OVC)\d{3}(?:TCU|CB)?$", part + ) # Cloud group with correct format (3 digits) + or part in ["SKC", "NSC", "CAVOK"] # Sky clear formats + or re.match(r"^VV\d{3}$", part) # Vertical visibility + or re.match(r"^[/]+$", part) + ): # Slashes (indicate missing data) continue - + # Check for isolated 2 or 3 digit numbers (not visibility or other valid formats) - if re.match(r'^\d{2,3}$', part): + if re.match(r"^\d{2,3}$", part): # Check if in reasonable context # If not preceded by R (RVR), may be abnormal return False, f"Isolated numeric value: {part}" - + # Check for FM/TL/AT time indicators without BECMG/TEMPO # These indicate TREND section which must have BECMG or TEMPO first - if re.match(r'^(FM|TL|AT)\d{4}$', part): + if re.match(r"^(FM|TL|AT)\d{4}$", part): # Check if there's a BECMG spelling error in the previous parts # Common BECMG spelling errors becmg_error_patterns = [ - 'BCNG', 'BECNG', 'BCEMG', 'BECML', 'BECMFG', 'BECMGG', 'BECMGA', 'BGECMG', - 'BECGG', 'BEEMG', 'BEMG', 'MECMG', 'BECMF', 'BECMGM', 'ECMG', 'BCECMG' + "BCNG", + "BECNG", + "BCEMG", + "BECML", + "BECMFG", + "BECMGG", + "BECMGA", + "BGECMG", + "BECGG", + "BEEMG", + "BEMG", + "MECMG", + "BECMF", + "BECMGM", + "ECMG", + "BCECMG", ] # Check last few parts for BECMG spelling errors check_range = min(5, len(parts)) for j in range(max(0, i - check_range), i): if parts[j] in becmg_error_patterns: return False, f"Spelling error: {parts[j]} (should be BECMG)" - + # If no spelling error found, report time indicator error return False, f"Time indicator {part[:2]} must follow BECMG or TEMPO" - - # Check for wrong cloud group format (e.g. KN026 should be BKN026) - cloud_like_pattern = re.compile(r'^[A-Z]{2,3}\d{3}') + + # Check for wrong cloud group format (e.g. KN026 should be BKN026, BKN0 height too short) + cloud_like_pattern = re.compile(r"^[A-Z]{2,3}\d{1,3}") if cloud_like_pattern.match(part): - valid_cloud_types = ['FEW', 'SCT', 'BKN', 'OVC', 'SKC', 'NSC', 'VV'] - if not any(part.startswith(ct) for ct in valid_cloud_types): + valid_cloud_types = ["FEW", "SCT", "BKN", "OVC", "SKC", "NSC", "VV"] + # Check if starts with valid type + if any(part.startswith(ct) for ct in valid_cloud_types): + # Check if height is 3 digits (except SKC/NSC which don't have height) + if part not in ["SKC", "NSC"]: + # Extract digits after cloud type + for ct in valid_cloud_types: + if part.startswith(ct): + remaining = part[len(ct) :] + # Check if remaining part has digits + digits_match = re.match(r"^(\d+)", remaining) + if digits_match: + digits = digits_match.group(1) + if len(digits) != 3: + return ( + False, + f"Invalid cloud height: {part} (height must be 3 digits)", + ) + break + else: return False, f"Invalid cloud group format: {part}" - + + # Check for known invalid fields + invalid_fields = ["DUPE", "TEST", "ERROR", "INVALID", "DELETE", "CANCEL"] + if part in invalid_fields: + return False, f"Invalid field: {part}" + # Check for obviously abnormal mixed fields # Examples: OCCGCRY, QUXQQ, DEPPQMPS, etc. # But exclude valid weather phenomenon codes (can be long, e.g. -FZDZSN, -TSRASN) - if len(part) > 6 and re.search(r'[A-Z]{6,}', part): + if len(part) > 6 and re.search(r"[A-Z]{6,}", part): # Check if it's a known valid field - known_fields = ['NOSIG', 'CAVOK', 'BECMG', 'TEMPO'] + known_fields = ["NOSIG", "CAVOK", "BECMG", "TEMPO"] if part in known_fields or any(kf in part for kf in known_fields): continue - + # Check if it's a weather phenomenon code # Pattern: [+-]?(VC|RE)?(MI|BC|PR|DR|BL|SH|TS|FZ)?(DZ|RA|SN|SG|IC|PL|GR|GS)+(BR|FG|FU|VA|DU|SA|HZ)?(PO|SQ|FC|SS|DS)? weather_pattern = re.compile( - r'^[+-]?' # Intensity - r'(VC|RE)?' # Vicinity/Recent - r'(MI|BC|PR|DR|BL|SH|TS|FZ)?' # Descriptor - r'(DZ|RA|SN|SG|IC|PL|GR|GS)+' # Precipitation (one or more) - r'(BR|FG|FU|VA|DU|SA|HZ)?' # Obscuration - r'(PO|SQ|FC|SS|DS)?$' # Other + r"^[+-]?" # Intensity + r"(VC|RE)?" # Vicinity/Recent + r"(MI|BC|PR|DR|BL|SH|TS|FZ)?" # Descriptor + r"(DZ|RA|SN|SG|IC|PL|GR|GS)+" # Precipitation (one or more) + r"(BR|FG|FU|VA|DU|SA|HZ)?" # Obscuration + r"(PO|SQ|FC|SS|DS)?$" # Other ) - + if weather_pattern.match(part): continue - + # May be abnormal field - if not re.match(r'^[A-Z]{4}$', part): # Not ICAO code + if not re.match(r"^[A-Z]{4}$", part): # Not ICAO code return False, f"Suspicious field: {part}" - + # 9. Validate TREND section if present if has_trend and trend_parts: # Check structure: time indicators (FM/TL/AT) must follow BECMG/TEMPO @@ -414,51 +611,87 @@ def validate_metar(text, strict_mode=False): prev_keyword = None for i, part in enumerate(trend_parts): # Track change type keywords - if part in ['BECMG', 'TEMPO']: + if part in ["BECMG", "TEMPO"]: prev_keyword = part continue - + # Time indicators must follow a change type keyword - if re.match(r'^(FM|TL|AT)\d{4}$', part): + if re.match(r"^(FM|TL|AT)\d{4}$", part): if prev_keyword is None: # FM/TL/AT without preceding BECMG/TEMPO is invalid return False, f"Time indicator {part} without BECMG/TEMPO" continue - + # NOSIG stands alone, doesn't need validation - if part == 'NOSIG': + if part == "NOSIG": continue - + # Skip valid TREND elements (wind, visibility, weather, clouds, NSW, CAVOK) - if (re.match(r'^(VRB|\d{3})\d{2}(G\d{2})?(MPS|KT)$', part) or # Wind - re.match(r'^\d{4}$', part) or # Visibility - re.match(r'^(FEW|SCT|BKN|OVC|SKC|NSC)', part) or # Clouds - part in ['NSW', 'CAVOK'] or # No significant weather / CAVOK - re.match(r'^[+-]?(VC|RE)?(MI|BC|PR|DR|BL|SH|TS|FZ)?(DZ|RA|SN|SG|IC|PL|GR|GS)?(BR|FG|FU|VA|DU|SA|HZ)?(PO|SQ|FC|SS|DS)?$', part)): # Weather + if ( + re.match(r"^(VRB|\d{3})\d{2}(G\d{2})?(MPS|KT)$", part) # Wind + or re.match(r"^\d{4}$", part) # Visibility + or re.match(r"^(FEW|SCT|BKN|OVC|SKC|NSC)", part) # Clouds + or part in ["NSW", "CAVOK"] # No significant weather / CAVOK + or re.match( + r"^[+-]?(VC|RE)?(MI|BC|PR|DR|BL|SH|TS|FZ)?(DZ|RA|SN|SG|IC|PL|GR|GS)?(BR|FG|FU|VA|DU|SA|HZ)?(PO|SQ|FC|SS|DS)?$", + part, + ) + ): # Weather continue - + # Prohibited in TREND: RVR, QNH, temperature, wind shear - if re.match(r'^R\d{2}', part): # RVR + if re.match(r"^R\d{2}", part): # RVR return False, f"RVR not allowed in TREND: {part}" - if re.match(r'^[AQ]\d{4}$', part): # QNH + if re.match(r"^[AQ]\d{4}$", part): # QNH return False, f"QNH not allowed in TREND: {part}" - if re.match(r'^M?\d{2}/M?\d{2}$', part): # Temperature/dewpoint + if re.match(r"^M?\d{2}/M?\d{2}$", part): # Temperature/dewpoint return False, f"Temperature not allowed in TREND: {part}" - if part.startswith('WS'): # Wind shear + if part.startswith("WS"): # Wind shear return False, f"Wind shear not allowed in TREND: {part}" - if re.match(r'^PROB\d{2}$', part): # Probability (TAF only) + if re.match(r"^PROB\d{2}$", part): # Probability (TAF only) return False, f"Probability group not allowed in TREND: {part}" - + + # Any other field in TREND that doesn't match valid patterns is suspicious + # Reject obviously invalid fields (e.g., K, JHHHHH, CHECK, TEXT) + # Single letters (except valid abbreviations) + if len(part) == 1: + return False, f"Suspicious field in TREND: {part}" + + # Long strings of same letter (e.g., JHHHHH, AAAAA) + if len(part) >= 5 and len(set(part)) <= 2: + return False, f"Suspicious field in TREND: {part}" + + # English words that shouldn't appear (CHECK, TEXT, NEW, ENDING, ADDED, DUPE) + suspicious_words = [ + "CHECK", + "TEXT", + "NEW", + "ENDING", + "ADDED", + "ERROR", + "INVALID", + "DUPE", + "TEST", + "DELETE", + "CANCEL", + ] + if part in suspicious_words: + return False, f"Suspicious field in TREND: {part}" + + # Long mixed letter fields (e.g., ZBBBXMXX) + if len(part) > 6 and not re.match(r"^\d+$", part): + return False, f"Suspicious field in TREND: {part}" + # 10. Check for multiple isolated single letter fields at end (e.g. "TE G") # Only check main observation part, not TREND if len(parts) >= 2: - last_two = ' '.join(parts[-2:]) + last_two = " ".join(parts[-2:]) # Check if they are two isolated uppercase letters - if re.match(r'^[A-Z]{1,2}\s+[A-Z]{1,2}$', last_two): + if re.match(r"^[A-Z]{1,2}\s+[A-Z]{1,2}$", last_two): # This may be an abnormal ending if parts[-2] not in valid_endings and parts[-1] not in valid_endings: return False, f"Invalid ending: {last_two}" - + return True, None @@ -554,7 +787,7 @@ def get_weather_description(code): # Precipitation types that can follow descriptors like SH precipitation_types = ["DZ", "RA", "SN", "SG", "IC", "PL", "GR", "GS"] - + weather_codes = { "DZ": "Drizzle", "RA": "Rain", @@ -586,36 +819,58 @@ def get_weather_description(code): "PR": "Partial", "DR": "Low Drifting", "FZ": "Freezing", - "VC": "In the Vicinity", - "RE": "Recent", + "VC": "in the Vicinity", # Will be appended at the end + "RE": "Recent", # Will be prepended at the beginning } + # Check for vicinity (VC) or recent (RE) prefix + # These should be placed at specific positions in the final description + vicinity_suffix = "" + recent_prefix = "" + + if code.startswith("VC"): + vicinity_suffix = " in the Vicinity" + code = code[2:] + elif code.startswith("RE"): + recent_prefix = "Recent " + code = code[2:] + description = "" remaining_code = code while len(remaining_code) > 0: matched = False for key in weather_codes.keys(): + # Skip VC and RE as they are handled separately + if key in ["VC", "RE"]: + continue + if remaining_code.startswith(key): # Special handling for SH: add "of" only if followed by precipitation if key == "SH": # Check if next part is a precipitation type - rest_of_code = remaining_code[len(key):] - has_precipitation = any(rest_of_code.startswith(pt) for pt in precipitation_types) + rest_of_code = remaining_code[len(key) :] + has_precipitation = any( + rest_of_code.startswith(pt) for pt in precipitation_types + ) if has_precipitation: description += "Showers of " else: description += "Showers " else: description += weather_codes[key] + " " - remaining_code = remaining_code[len(key):] + remaining_code = remaining_code[len(key) :] matched = True break - + # If no match found, skip this character to avoid infinite loop if not matched: remaining_code = remaining_code[1:] - return intensity + description.strip() + # Assemble final description: recent_prefix + intensity + description + vicinity_suffix + final_description = ( + recent_prefix + intensity + description.strip() + vicinity_suffix + ) + return final_description.strip() def parse_text(text, year, month): diff --git a/tests/case/metar/parse_text_case.py b/tests/case/metar/parse_text_case.py index 49af16f..f463734 100644 --- a/tests/case/metar/parse_text_case.py +++ b/tests/case/metar/parse_text_case.py @@ -629,7 +629,7 @@ "cloud_type": None, }, ], - "weather": ["In the Vicinity Showers"], + "weather": ["Showers in the Vicinity"], "auto": False, }, }, @@ -662,4 +662,103 @@ "weather": ["Clear Sky"], }, }, + { + "kwargs": { + "text": "METAR RCNN 091100Z 20003KT 9999 VCSH FEW010 BKN040 29/25 Q1010 RMK A2984 VCSH W=", + "year": 2024, + "month": 10, + }, + "result": { + "kind": "METAR", + "icao": "RCNN", + "auto": False, + "datetime": "2024-10-09T11:00:00+00:00", + "wind_direction": 200, + "wind_direction_units": "degree", + "wind_speed": 1, + "wind_speed_units": "m/s", + "gust": None, + "wind_direction_range": None, + "visibility": 99999, + "visibility_units": "m", + "cavok": False, + "temperature": 29, + "dew_temperature": 25, + "temperature_units": "degree C", + "qnh": 1010, + "qnh_units": "hPa", + "cloud": [ + { + "cloud_mask": 0.75, + "cloud_height": 800, + "cloud_height_units": "m", + "cloud_type": None, + }, + { + "cloud_mask": 0.25, + "cloud_height": 200, + "cloud_height_units": "m", + "cloud_type": None, + }, + ], + "weather": ["Showers in the Vicinity"], # Only one, RMK content is ignored + }, + }, + { + "kwargs": { + "text": "METAR RCNN 092130Z 30003KT 6000 -RA VCTS FEW008 FEW012CB BKN016 BKN040 27/26 Q1009 BECMG FM2200 TL2300 TSRA RMK A2980 TS SW MOV N=", + "year": 2024, + "month": 10, + }, + "result": { + "kind": "METAR", + "icao": "RCNN", + "auto": False, + "datetime": "2024-10-09T21:30:00+00:00", + "wind_direction": 300, + "wind_direction_units": "degree", + "wind_speed": 1, + "wind_speed_units": "m/s", + "gust": None, + "wind_direction_range": None, + "visibility": 6000, + "visibility_units": "m", + "cavok": False, + "temperature": 27, + "dew_temperature": 26, + "temperature_units": "degree C", + "qnh": 1009, + "qnh_units": "hPa", + "cloud": [ + { + "cloud_mask": 0.75, + "cloud_height": 320, + "cloud_height_units": "m", + "cloud_type": None, + }, + { + "cloud_mask": 0.75, + "cloud_height": 800, + "cloud_height_units": "m", + "cloud_type": None, + }, + { + "cloud_mask": 0.25, + "cloud_height": 160, + "cloud_height_units": "m", + "cloud_type": None, + }, + { + "cloud_mask": 0.25, + "cloud_height": 240, + "cloud_height_units": "m", + "cloud_type": "cumulonimbus", + }, + ], + "weather": [ + "Light Rain", + "Thunderstorm in the Vicinity", + ], # VCTS with new word order, RMK ignored + }, + }, ] diff --git a/tests/test_validation.py b/tests/test_validation.py index 7a74c6e..82d2072 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -8,7 +8,7 @@ class TestValidation: """测试 validate_metar 函数""" - + def test_valid_metars(self): """测试合法的 METAR 报文""" valid_metars = [ @@ -18,12 +18,10 @@ def test_valid_metars(self): "METAR RCQC 301730Z NIL=", "SPECI ZBHD 311029Z 30009MPS 7000 -TSRA SCT030CB BKN046 25/17 Q1007 NOSIG=", "METAR RCMQ 010400Z 02020G30KT 9999 VCSH SCT004 BKN014 BKN040 15/09 Q1018 NOSIG=", - # TREND cases "METAR ZBAA 241400Z 14002MPS 090V210 9999 -TSRA SCT005 FEW033CB BKN040 25/24 Q1006 RESHRA BECMG TL1440 NSW=", "METAR ZBAA 310630Z 09002MPS 050V140 8000 -SHRA NSC 19/14 Q1007 TEMPO 2000 RA BR=", "METAR ZBAA 310630Z 09002MPS 050V140 8000 -SHRA NSC 19/14 Q1007 BECMG FM1630 TL1730 CAVOK=", - # RMK cases "METAR RCKH 040200Z 36005KT 2200 -DZ FEW006 BKN030 OVC050 12/09 Q1025 TEMPO 3200 RMK RA AMT T=", "METAR RCFN 290630Z 07009KT 030V110 9999 FEW015 FEW025TCU SCT080 BKN180 31/25 Q1006 NOSIG RMK TCU SW-W A2971=", @@ -31,16 +29,17 @@ def test_valid_metars(self): "METAR RCMQ 222000Z 34003KT 0300 -RA FG VV001 15/15 Q1011 RMK A2988 RA AMT T VIS S 0300M RVR N/A=", "METAR RCTP 150700Z 23003KT 2000 -DZ BR SCT005 BKN008 OVC030 21/20 Q1010 NOSIG RMK RA AMT T=", "METAR VMMC 220000Z 20008KT 9999 3500S FU FEW002 SCT010 26/25 Q1009 NOSIG RMK RWY 34 FU=", - # AUTO and missing data "METAR ZJSY 171900Z AUTO 12003MPS //// // ///////// 27/25 Q1006=", "METAR VMMC 230030Z 36017KT 330V030 6000 FEW020 BKN080 27/22 Q//// NOSIG=", # Q//// missing data ] - + for metar in valid_metars: is_valid, error_msg = validate_metar(metar) - assert is_valid, f"Valid METAR incorrectly identified as invalid: {metar}\nError: {error_msg}" - + assert ( + is_valid + ), f"Valid METAR incorrectly identified as invalid: {metar}\nError: {error_msg}" + def test_invalid_qnh_format(self): """测试气压组格式错误""" invalid_metars = [ @@ -49,24 +48,27 @@ def test_invalid_qnh_format(self): ("METAR ZGOW 132100Z 31004MPS 7000 NSC 06/03 Q10 NOSIG=", "Q10"), ("METAR ZBYN 031600Z 10002MPS CAVOK 14/M03 Q101=", "Q101"), ] - + for metar, expected_field in invalid_metars: is_valid, error_msg = validate_metar(metar) assert not is_valid, f"Invalid QNH not detected in: {metar}" assert expected_field in error_msg or "QNH" in error_msg - + def test_invalid_time_format(self): """测试时间组格式错误""" invalid_metars = [ - ("METAR ZGSZ 551800Z AUTO 17004MPS //// // ////// 29/28 Q1004 NOSIG=", 55), # 日期错误 + ( + "METAR ZGSZ 551800Z AUTO 17004MPS //// // ////// 29/28 Q1004 NOSIG=", + 55, + ), # 日期错误 ("ZBTJ 17004MPS 5000 FU SKC 11/M02 Q1015 NOSIG=", "17004MPS"), # 缺少时间组 ("ZSSS 022000 14003MPS CAVOK 15/10 1014=", "022000"), # 时间组格式异常 ] - + for metar, expected in invalid_metars: is_valid, error_msg = validate_metar(metar) assert not is_valid, f"Invalid time format not detected in: {metar}" - + def test_invalid_wind_format(self): """测试风组格式错误""" invalid_metars = [ @@ -74,11 +76,11 @@ def test_invalid_wind_format(self): "METAR ZSSS 151100Z 0003MPS 2500 HZ SKC 03/M07 Q1025 NOSIG=", # 0003MPS错误 "METAR ZSSS 151700Z 1003MPS 6000 SKC M01/M05 Q1027 NOSIG=", # 1003MPS错误 ] - + for metar in invalid_metars: is_valid, error_msg = validate_metar(metar) assert not is_valid, f"Invalid wind format not detected in: {metar}" - + def test_invalid_characters(self): """测试包含非法字符""" invalid_metars = [ @@ -87,65 +89,70 @@ def test_invalid_characters(self): "METAR ZBTJ 230700Z 33006MPS CAVOK 14/M34 Q1016 NOSIG.=", # . "METAR ZYTL 300700Z (8 4.0' :-=", # ( . ' : ] - + for metar in invalid_metars: is_valid, error_msg = validate_metar(metar) assert not is_valid, f"Invalid characters not detected in: {metar}" assert "invalid characters" in error_msg.lower() - + def test_invalid_endings(self): """测试异常的末尾字段""" invalid_metars = [ ("METAR ZGOW 140900Z 08001MPS CAVOK 14/04 Q1018 NOSI=", "NOSI"), ("ZSAM 280100Z VRB02MPS 9999 BKN026 OVC050 18/14 Q1016 OSIG=", "OSIG"), - ("ZGSZ 030400Z 09003MPS 5000 -RA BR SCT010 OVC030 23/22 Q1013 NOSIG DUPE=", "DUPE"), + ( + "ZGSZ 030400Z 09003MPS 5000 -RA BR SCT010 OVC030 23/22 Q1013 NOSIG DUPE=", + "DUPE", + ), ("ZLXY 112300Z 01002MPS 3000 DU SKC 17/06 Q1009 TE G=", "TE G"), ] - + for metar, expected_ending in invalid_metars: is_valid, error_msg = validate_metar(metar) assert not is_valid, f"Invalid ending not detected in: {metar}" - + def test_isolated_values(self): """测试孤立的数字或字符""" invalid_metars = [ "METAR ZGOW 140100Z 33006MPS CAVOK 003 Q1023 NOSIG=", # 孤立的003 "ZLXY 050900Z 2 09005MPS CAVOK 31/27 Q1004 NOSIG=", # 孤立数字2 ] - + for metar in invalid_metars: is_valid, error_msg = validate_metar(metar) assert not is_valid, f"Isolated value not detected in: {metar}" - + def test_invalid_cloud_format(self): """测试错误的云组格式""" metar = "METAR ZGGG 110500Z 35003MPS 310V030 1100 R03/P1500 -SHRA BR FEW026TCU KN026 19/17 Q1012 TEMPO 1500 SHRA SCT025CB OVC030=" is_valid, error_msg = validate_metar(metar) assert not is_valid, f"Invalid cloud format (KN026) not detected" assert "KN026" in error_msg - + def test_short_text(self): """测试报文太短""" invalid_metars = [ "ZYTX 0103=", "ZSSS 302OH=", ] - + for metar in invalid_metars: is_valid, error_msg = validate_metar(metar) assert not is_valid, f"Short text not detected in: {metar}" - + def test_nil_reports(self): """测试 NIL 报文""" valid_nil_metars = [ "METAR RCQC 301730Z NIL=", "METAR RCMQ 080500Z NIL", ] - + for metar in valid_nil_metars: is_valid, error_msg = validate_metar(metar) - assert is_valid, f"Valid NIL METAR incorrectly identified as invalid: {metar}" - + assert ( + is_valid + ), f"Valid NIL METAR incorrectly identified as invalid: {metar}" + def test_trend_validation(self): """测试趋势报验证""" # Valid TREND cases @@ -155,23 +162,37 @@ def test_trend_validation(self): "METAR ZBAA 310630Z 09002MPS 8000 -SHRA NSC Q1007 BECMG FM1630 TL1730 CAVOK=", "METAR RCKH 040200Z 36005KT 2200 -DZ FEW006 Q1025 TEMPO 3200 RMK RA AMT T=", # TEMPO with visibility only ] - + for metar in valid_trends: is_valid, error_msg = validate_metar(metar) - assert is_valid, f"Valid TREND METAR incorrectly identified as invalid: {metar}\nError: {error_msg}" - + assert ( + is_valid + ), f"Valid TREND METAR incorrectly identified as invalid: {metar}\nError: {error_msg}" + # Invalid TREND cases invalid_trends = [ - ("METAR RCMQ 250430Z 01013KT 9000 VCSH SCT003 Q1023 FM0430 8000 -RA RMK A3023 VCSH NE=", "FM without BECMG/TEMPO"), - ("METAR ZSFZ 120400Z 04005MPS 3800 -TSRA BR BKN003 SCT020CB OVC033 21/20 Q1010 FM0530 -SHRA BKN010 FEW020CB OVC040=", "FM without BECMG/TEMPO"), - ("METAR ZBAA 310630Z 09002MPS 8000 -SHRA NSC Q1007 TEMPO R06/0800U=", "RVR in TREND"), - ("METAR ZBAA 310630Z 09002MPS 8000 -SHRA NSC Q1007 BECMG Q1012=", "QNH in TREND"), + ( + "METAR RCMQ 250430Z 01013KT 9000 VCSH SCT003 Q1023 FM0430 8000 -RA RMK A3023 VCSH NE=", + "FM without BECMG/TEMPO", + ), + ( + "METAR ZSFZ 120400Z 04005MPS 3800 -TSRA BR BKN003 SCT020CB OVC033 21/20 Q1010 FM0530 -SHRA BKN010 FEW020CB OVC040=", + "FM without BECMG/TEMPO", + ), + ( + "METAR ZBAA 310630Z 09002MPS 8000 -SHRA NSC Q1007 TEMPO R06/0800U=", + "RVR in TREND", + ), + ( + "METAR ZBAA 310630Z 09002MPS 8000 -SHRA NSC Q1007 BECMG Q1012=", + "QNH in TREND", + ), ] - + for metar, description in invalid_trends: is_valid, error_msg = validate_metar(metar) assert not is_valid, f"Invalid TREND not detected: {metar} ({description})" - + def test_rmk_free_text(self): """测试 RMK 自由文本""" # Valid RMK cases - RMK is free text, various content allowed @@ -184,22 +205,24 @@ def test_rmk_free_text(self): "METAR RCNN 211400Z 09008KT 9999 VCSH SCT012 Q1008 NOSIG RMK A2981 CB N-NE=", # Direction range "METAR RCMQ 230900Z 25008KT 9999 VCSH FEW010 Q1009 NOSIG RMK A2982 VCSH E TCU E=", # Multiple directions ] - + for metar in valid_rmk: is_valid, error_msg = validate_metar(metar) - assert is_valid, f"Valid RMK METAR incorrectly identified as invalid: {metar}\nError: {error_msg}" - + assert ( + is_valid + ), f"Valid RMK METAR incorrectly identified as invalid: {metar}\nError: {error_msg}" + # Invalid: TREND in RMK (position error) invalid_rmk = [ "METAR RCKH 192000Z 06004KT 6000 FEW015 Q1019 NOSIG RMK A3009 BECMG 4500 BR=", "METAR ZBAA 192000Z 06004KT 6000 FEW015 Q1019 NOSIG RMK A3009 TEMPO 4500 BR=", ] - + for metar in invalid_rmk: is_valid, error_msg = validate_metar(metar) assert not is_valid, f"TREND in RMK not detected: {metar}" assert "TREND keyword" in error_msg and "RMK section" in error_msg - + def test_auto_and_missing_data(self): """测试 AUTO 和缺测数据""" valid_metars = [ @@ -207,43 +230,105 @@ def test_auto_and_missing_data(self): "METAR VMMC 230030Z 36017KT 330V030 6000 FEW020 BKN080 27/22 Q//// NOSIG=", "METAR ZYQQ 081700Z AUTO /////MPS //// // ////// M05/M07 Q1006=", ] - + for metar in valid_metars: is_valid, error_msg = validate_metar(metar) - assert is_valid, f"Valid AUTO/missing data METAR incorrectly identified as invalid: {metar}\nError: {error_msg}" - + assert ( + is_valid + ), f"Valid AUTO/missing data METAR incorrectly identified as invalid: {metar}\nError: {error_msg}" + def test_spelling_errors(self): """测试拼写错误""" spelling_errors = [ - ("METAR VHHH 280100Z 09008KT 060V160 7000 FEW008 Q1011 EMPO 4000 SHRA=", "EMPO"), + ( + "METAR VHHH 280100Z 09008KT 060V160 7000 FEW008 Q1011 EMPO 4000 SHRA=", + "EMPO", + ), ("METAR ZSHC 270130Z VRB02MPS 2500 BR NSC Q1032 ECMG 3000 BR=", "ECMG"), - ("METAR RCMQ 311200Z 16005KT 6000 -RA SCT003 Q1009 BCECMG TL1200 6000 -RA BKN016 RMK A2982=", "BCECMG"), - ("METAR ZBYN 191330Z 00000MPS 5000 -RA BR SCT033 25/23 Q1006 TRMPO 2500 RA BR=", "TRMPO"), + ( + "METAR RCMQ 311200Z 16005KT 6000 -RA SCT003 Q1009 BCECMG TL1200 6000 -RA BKN016 RMK A2982=", + "BCECMG", + ), + ( + "METAR ZBYN 191330Z 00000MPS 5000 -RA BR SCT033 25/23 Q1006 TRMPO 2500 RA BR=", + "TRMPO", + ), + # NOSIG double letter errors + ("METAR ZBAA 141400Z 14002MPS 9999 SCT005 Q1006 NNOSIG=", "NNOSIG"), + ("METAR ZBAA 141400Z 14002MPS 9999 SCT005 Q1006 NOSSIG=", "NOSSIG"), # BECMG spelling errors - ("METAR ZSOF 172000Z 08002MPS 3500 BR NSC 11/09 Q1025 BCNG TL2100 2500=", "BCNG"), - ("METAR ZSOF 171900Z 07002MPS 4000 BR NSC 11/09 Q1025 BECMFG TL2100 2500=", "BECMFG"), - ("METAR ZSOF 132200Z 29002MPS 2600 BR NSC 10/07 Q1024 BECMGG TL2330 3000=", "BECMGG"), - ("METAR ZSOF 200100Z 34002MPS 300V040 1400 R33/1400U -RA BR BKN005 OVC040 10/09 Q1022 BECMGA AT0300 1500=", "BECMGA"), - ("METAR ZSOF 302000Z 01001MPS 3000 BR FEW046 21/21 Q1009 BGECMG TL2100 2000=", "BGECMG"), - ("METAR ZSNJ 012200Z 07002MPS 2200 BR NSC 03/02 Q1028 BECGG TL2330 3000=", "BECGG"), - ("METAR ZSNJ 131400Z 00000MPS 2000 R06/0900V1700N BR NSC 01/M01 Q1027 BEEMG TL1530 1400=", "BEEMG"), - ("METAR ZSNJ 252200Z 06002MPS 1200 R24/1100N R25/1300N BR FEW006 SCT023 23/22 Q1014 BEMG TL2330 2000=", "BEMG"), - ("METAR ZSNJ 192200Z 06001MPS 1700 R06/1000V1800U R07/P2000 BR NSC 16/16 Q1013 MECMG TL2330 3000 HZ=", "MECMG"), - ("METAR ZSNJ 250900Z 26003MPS 3000 HZ FEW029 07/M02 Q1023 BECMF TL1030 2500=", "BECMF"), - ("METAR ZSNJ 251000Z 24002MPS 210V280 8000 BKN010 OVC026 22/20 Q1011 BECMGM TL1130 BKN020=", "BECMGM"), + ( + "METAR ZSOF 172000Z 08002MPS 3500 BR NSC 11/09 Q1025 BCNG TL2100 2500=", + "BCNG", + ), + ( + "METAR ZSOF 171900Z 07002MPS 4000 BR NSC 11/09 Q1025 BECMFG TL2100 2500=", + "BECMFG", + ), + ( + "METAR ZSOF 132200Z 29002MPS 2600 BR NSC 10/07 Q1024 BECMGG TL2330 3000=", + "BECMGG", + ), + ( + "METAR ZSOF 200100Z 34002MPS 300V040 1400 R33/1400U -RA BR BKN005 OVC040 10/09 Q1022 BECMGA AT0300 1500=", + "BECMGA", + ), + ( + "METAR ZSOF 302000Z 01001MPS 3000 BR FEW046 21/21 Q1009 BGECMG TL2100 2000=", + "BGECMG", + ), + ( + "METAR ZSNJ 012200Z 07002MPS 2200 BR NSC 03/02 Q1028 BECGG TL2330 3000=", + "BECGG", + ), + ( + "METAR ZSNJ 131400Z 00000MPS 2000 R06/0900V1700N BR NSC 01/M01 Q1027 BEEMG TL1530 1400=", + "BEEMG", + ), + ( + "METAR ZSNJ 252200Z 06002MPS 1200 R24/1100N R25/1300N BR FEW006 SCT023 23/22 Q1014 BEMG TL2330 2000=", + "BEMG", + ), + ( + "METAR ZSNJ 192200Z 06001MPS 1700 R06/1000V1800U R07/P2000 BR NSC 16/16 Q1013 MECMG TL2330 3000 HZ=", + "MECMG", + ), + ( + "METAR ZSNJ 250900Z 26003MPS 3000 HZ FEW029 07/M02 Q1023 BECMF TL1030 2500=", + "BECMF", + ), + ( + "METAR ZSNJ 251000Z 24002MPS 210V280 8000 BKN010 OVC026 22/20 Q1011 BECMGM TL1130 BKN020=", + "BECMGM", + ), # More BECMG spelling errors - ("METAR ZSNB 071500Z 19005MPS 2000 +TSRA BR FEW009 BKN016 FEW033CB OVC050 24/24 Q1000 BCEMG TL1630 3000 -SHRA BR=", "BCEMG"), - ("METAR ZSNB 122100Z VRB01MPS 4000 BR SCT033 09/06 Q1021 BCEMG TL2230 2500=", "BCEMG"), - ("METAR ZSFZ 241800Z 33002MPS 5000 HZ NSC 19/09 Q1014 BECNG TL1930 2900=", "BECNG"), - ("METAR ZSFZ 241700Z 02002MPS 6000 NSC 20/09 Q1014 BECNG TL1830 2900=", "BECNG"), - ("METAR ZUGY 100700Z 03004MPS 9999 SCT010 BKN020 OVC033 21/20 Q1010 BECML TL0730 -TSRA=", "BECML"), + ( + "METAR ZSNB 071500Z 19005MPS 2000 +TSRA BR FEW009 BKN016 FEW033CB OVC050 24/24 Q1000 BCEMG TL1630 3000 -SHRA BR=", + "BCEMG", + ), + ( + "METAR ZSNB 122100Z VRB01MPS 4000 BR SCT033 09/06 Q1021 BCEMG TL2230 2500=", + "BCEMG", + ), + ( + "METAR ZSFZ 241800Z 33002MPS 5000 HZ NSC 19/09 Q1014 BECNG TL1930 2900=", + "BECNG", + ), + ( + "METAR ZSFZ 241700Z 02002MPS 6000 NSC 20/09 Q1014 BECNG TL1830 2900=", + "BECNG", + ), + ( + "METAR ZUGY 100700Z 03004MPS 9999 SCT010 BKN020 OVC033 21/20 Q1010 BECML TL0730 -TSRA=", + "BECML", + ), ] - + for metar, expected_error in spelling_errors: is_valid, error_msg = validate_metar(metar) assert not is_valid, f"Spelling error not detected: {metar}" assert expected_error in error_msg or "Spelling error" in error_msg - + def test_complex_weather_phenomena(self): """测试复杂天气现象组合""" # Valid complex weather phenomenon codes @@ -253,11 +338,13 @@ def test_complex_weather_phenomena(self): "METAR ZUGY 241500Z 06003MPS 7000 -SHRASN FEW004 BKN015 FEW020TCU OVC026 01/M00 Q1024 NOSIG=", # -SHRASN "METAR ZUGY 241300Z 04005MPS 9999 -TSRASN FEW005 BKN015 FEW023CB OVC030 02/01 Q1023 BECMG TL1430 NSW=", # -TSRASN ] - + for metar in complex_weather: is_valid, error_msg = validate_metar(metar) - assert is_valid, f"Valid complex weather METAR incorrectly identified as invalid: {metar}\nError: {error_msg}" - + assert ( + is_valid + ), f"Valid complex weather METAR incorrectly identified as invalid: {metar}\nError: {error_msg}" + def test_suspicious_fields(self): """测试可疑的异常字段""" # Invalid fields that should be detected @@ -266,9 +353,231 @@ def test_suspicious_fields(self): "METAR ZPPP 161600Z 02002MPS 9999 SCT040 OCCGCRY QUXQQ Q1019 NOSIG=", # OCCGCRY and QUXQQ "METAR ZYTX 241500Z 14002MPS CASACI32 ZBBB 241500=", # CASACI32 invalid field ] - + for metar in suspicious_metars: is_valid, error_msg = validate_metar(metar) assert not is_valid, f"Suspicious field not detected: {metar}" assert "Suspicious field" in error_msg or "QNH" in error_msg + def test_additional_validations(self): + """测试额外的验证规则""" + # 孤立数字末尾 + isolated_digit = [ + "METAR ZGOW 191600Z 00000MPS 3000 BR NSC 22/21 Q1017 NOSIG 9 =", + "METAR ZGOW 191600Z 00000MPS 3000 BR NSC 22/21 Q1017 NOSIG 3 =", + ] + + for metar in isolated_digit: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Isolated digit not detected: {metar}" + assert "Isolated digit at ending" in error_msg + + # 可疑短字段 (数字+字母) + suspicious_short = [ + "METAR ZSOF 170300Z VRB02MPS CAVOK 0K 20/10 Q1038 NOSIG=", + "METAR ZSOF 170300Z VRB02MPS CAVOK 1A 20/10 Q1038 NOSIG=", + ] + + for metar in suspicious_short: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Suspicious short field not detected: {metar}" + assert "Suspicious field" in error_msg + + # 温度格式错误 (+ 前缀) + invalid_temp = [ + "METAR ZSOF 170300Z VRB02MPS CAVOK 20/10 +3/M12 Q1038 NOSIG=", + ] + + for metar in invalid_temp: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Invalid temperature format not detected: {metar}" + assert "Invalid temperature format" in error_msg + + # NOSIG 拼写变体 + nosig_variants = [ + "METAR ZUGY 240500Z 02002MPS 7000 NSC 13/M01 Q1028 NOAISIG=", + "METAR ZUGY 240500Z 02002MPS 7000 NSC 13/M01 Q1028 NOAI SIG=", + "METAR ZYTX 230330Z 20003MPS 170V250 6000 NSC 02/M12 Q1022 NOSZ CHECK TEXT NEW ENDING ADDED ZBBBXMXX=", # NOSZ + TREND乱码 + ] + + for metar in nosig_variants: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"NOSIG variant not detected: {metar}" + assert "NOAI" in error_msg or "NOSZ" in error_msg + + # 能见度格式错误 + invalid_vis = [ + "METAR ZYTL 301930Z 30003MPS 60008P FEW033 M04/M10 Q1026 NOSIG=", + "METAR ZYTL 301930Z 30003MPS 12345AB FEW033 M04/M10 Q1026 NOSIG=", + ] + + for metar in invalid_vis: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Invalid visibility not detected: {metar}" + assert "Invalid visibility format" in error_msg + + # TREND 中的可疑字段 + trend_suspicious = [ + "METAR ZUUU 092200Z 00000MPS 2700 BR NSC 02/02 Q1023 BECMG TL2350 K JHHHHH=", + "METAR ZUUU 092200Z 00000MPS 2700 BR NSC 02/02 Q1023 BECMG TL2350 CHECK TEXT=", + ] + + for metar in trend_suspicious: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Suspicious TREND field not detected: {metar}" + assert "Suspicious field in TREND" in error_msg + + def test_fixable_errors(self): + """测试可修复的错误(这些报文在修复前应该是异常的)""" + # VR/VRB 前导零 + vr_errors = [ + "METAR RCSS 241530Z VR001KT 9999 -RA FEW008 BKN100 26/24 Q1013 NOSIG=", # VR001KT + "METAR ZLLL 060500Z VRB002MPS CAVOK 06/M19 Q1024 NOSIG=", # VRB002MPS + ] + + for metar in vr_errors: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"VR/VRB error not detected: {metar}" + + # 云组高度错误、温度格式错误、BECMG粘连 + combined_errors = [ + "METAR ZGHA 280100Z VRB01MPS 0300 FG BKN0 20/10 Q1022 NOSIG=", # BKN0 (1位高度) + "METAR ZGHA 280100Z VRB01MPS 0300 FG BKN020 0/10 Q1022 NOSIG=", # 0/10 (温度位数不足) + "METAR ZGHA 280100Z VRB01MPS 0300 FG BKN020 20/10 Q1022 BECMGTL0130 0900=", # BECMGTL0130 (粘连) + "METAR ZGHA 280100Z VRB01MPS 0300 R18R/0550V0700N R18L/0500V0700U FG BKN0 0/10 Q1022 BECMGTL0130 0900=", # 综合错误 + ] + for metar in combined_errors: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Error not detected: {metar}\nError: {error_msg}" + + # 风组格式错误(不完整阵风、错误单位) + wind_errors = [ + "SPECI RCMQ 270025Z 000G UKT 2400 BR BKN002 BKN008 27/27 Q1010 BECMG TL0040 3000 BR BKN002 RMK A2984=", # 000G UKT + "METAR ZYTL 020900Z 01006M=", # 01006M (不完整单位) + ] + for metar in wind_errors: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Wind error not detected: {metar}\nError: {error_msg}" + assert "Invalid wind format" in error_msg + + # 报文太短(缺少观测数据) + too_short = [ + "METAR ZBYN 100800Z 29006MPS=", + ] + for metar in too_short: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Too short not detected: {metar}\nError: {error_msg}" + assert "Missing observation data" in error_msg + + # 无效字段(DUPE等) + invalid_field_errors = [ + "METAR ZGSZ 180700Z 16004MPS 9999 SCT018 BKN050 27/25 Q1007 NOSIG DUPE=", + ] + for metar in invalid_field_errors: + is_valid, error_msg = validate_metar(metar) + assert ( + not is_valid + ), f"Invalid field not detected: {metar}\nError: {error_msg}" + assert "DUPE" in error_msg + + # 重复报头 + duplicate_headers = [ + "METAR ZPPP METAR ZPPP 280600Z 24007MPS 9999 SCT030 25/04 Q1013 NOSIG=", + "METAR METAR ZBAA 111400Z 15001MPS 4000 BR BKN023 M02/M05 Q1030 NOSIG=", + "METAR SPECI ZSHC 190608Z 27006MPS 9999 SHRA BKN023 FEW023TCU 30/25 Q1003 NOSIG=", + ] + + for metar in duplicate_headers: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Duplicate header not detected: {metar}" + + # COR 位置错误 + cor_position = [ + "METAR VHHH COR 140730Z 12008KT 090V180 CAVOK 24/11 Q1015 WS RWY07L NOSIG=", + ] + + for metar in cor_position: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"COR position error not detected: {metar}" + + # 风速单位空格分隔 + wind_spacing = [ + "METAR ZGSZ 100800Z 21003M P S 170V230 5000 HZ SKC 28/19 Q1014 NOSIG=", + ] + + for metar in wind_spacing: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Wind spacing error not detected: {metar}" + + # 风组格式错误 + wind_format = [ + "METAR ZYTX 151300Z 1800C 41MPS 6000 NSC M13/M19 Q1030 NOSIG=", # 1800C + 41MPS + ] + + for metar in wind_format: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Wind format error not detected: {metar}" + assert "Invalid wind format" in error_msg + + # QNH 相关错误 + qnh_errors = [ + "METAR RCKH 030400Z 34007KT 7000 FEW012 SCT020 BKN060 22/13 Q1 012 NOSIG=", # Q1 012 + "METAR ZSNJ 262000Z 04002MPS 3000 BR NSC 20/18 Q1016N NOSIG=", # Q1016N + "METAR ZJHK 280830Z 32006MPS 5000 BR NSC 09/M02 Q1028NOSIT=", # Q1028NOSIT + "METAR ZJHK 280830Z 32006MPS 5000 BR NSC 09/M02 Q1011BECMG TL0930 3000=", # Q1011BECMG + "METAR ZJHK 280830Z 32006MPS 5000 BR NSC 09/M02 Q1010NOSIG=", # Q1010NOSIG + ] + + for metar in qnh_errors: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"QNH error not detected: {metar}" + + # NOSIG 间距错误 + nosig_spacing = [ + "METAR ZSQD 110100Z 20001MPS 6000 SKC M01/M07 Q1038 N NOSIG=", # N NOSIG + "METAR ZJHK 280830Z 32006MPS 5000 BR NSC 09/M02 Q1020 NOS IG=", # NOS IG + ] + + for metar in nosig_spacing: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"NOSIG spacing error not detected: {metar}" + + # 云组拼写错误 + cloud_errors = [ + "METAR ZJHK 280830Z 32006MPS 5000 FE023 09/M02 Q1020 NOSIG=", # FE023 + "METAR ZJHK 280830Z 32006MPS 5000 BCF000 09/M02 Q1020 NOSIG=", # BCF000 (只有1个字符匹配,不应该修复) + ] + + for metar in cloud_errors: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Cloud spelling error not detected: {metar}" + + # 嵌入报文 + embedded = [ + "METAR ZSAM 250400Z 03003MPS 9999 FE023 32/23 Q1002 NOSIG SQD 250400Z 02004MPS 9999 BKN023 29/20 Q1004 NOSIG ZUUU NI=", + ] + + for metar in embedded: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Embedded reports not detected: {metar}" + + # 问号 + question_marks = [ + "METAR RCNN 250000Z 02002KT 9999 FEW012 SCT040 BKN120? 28/23 Q1012 NOSIG=", + "METAR RCNN 270730Z 30005G?KT 6000 -TSRA SCT010 FEW012CB BKN021 BKN040 27/24 Q1009 NOSIG=", + "METAR RCMQ 222000Z 02020G30KT 9999 -RA FEW010?BKN020 BKN040 20/16 Q1008 RMK A2979=", + ] + + for metar in question_marks: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Question mark not detected: {metar}" + + # 缺少报文头 + missing_header = [ + "ZBAA 111200Z 15001MPS 4000 BR BKN023 M02/M05 Q1030 NOSIG=", + "ZBAA 111215Z 15001MPS 4000 BR BKN023 M02/M05 Q1030 NOSIG=", + ] + + for metar in missing_header: + is_valid, error_msg = validate_metar(metar) + assert not is_valid, f"Missing header not detected: {metar}" From 2eee0a8f0f616f5047944c602eba65c127a5d66d Mon Sep 17 00:00:00 2001 From: Clarmy Lee Date: Thu, 16 Oct 2025 18:39:55 +0800 Subject: [PATCH 2/2] chore: bump version to 1.1.1 --- pymetaf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymetaf/__init__.py b/pymetaf/__init__.py index e67ca92..4c76841 100644 --- a/pymetaf/__init__.py +++ b/pymetaf/__init__.py @@ -1,3 +1,3 @@ -__version__ = "1.1.0" +__version__ = "1.1.1" from .parser import *