#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
"""
lnet_legacy2yaml

Convert legacy (deprecated) Lustre LNet ip2nets, routes, and networks module parameters
into the YAML format accepted by `lnetctl import` (introduced Lustre 2.10+).

Usage:
  A) Pass a legacy ip2nets string directly:
      ./lnet_legacy2yaml 'tcp0(eth0,eth1) 10.0.0.*; o2ib0 192.168.[1-2].*'

  B) Pass a full legacy options line (the tool will extract ip2nets=..., routes=..., and networks=...):
      ./lnet_legacy2yaml "options lnet 'ip2nets=\"tcp0 10.0.0.*\" routes=...';"

  C) Read from stdin (omit positional argument):
      echo 'tcp(eth0) 192.168.0.[2,4]; tcp 192.168.0.*' | ./lnet_legacy2yaml

  D) Provide separate files for ip2nets, routes, and/or networks:
      ./lnet_legacy2yaml -i /path/ip2nets.dat -r /path/routes.dat -n /path/networks.dat

  E) Mix explicit files with an options line (file contents are appended):
      ./lnet_legacy2yaml -i ip.rules "options lnet 'routes=\"tcp0 1 nid@o2ib0\"'"

  F) Parse a modprobe configuration file for lnet parameters:
      ./lnet_legacy2yaml -m /etc/modprobe.d/lnet.conf

  G) Force unspecified route hopcounts to 1 (use when your legacy config
        assumes an implicit 1 hop but omits it):
            ./lnet_legacy2yaml -r routes.dat --default-hop

  H) Force unspecified route hopcounts to a specific value (e.g. 2):
      ./lnet_legacy2yaml -r routes.dat --default-hop 2

  I) Convert networks parameter:
      ./lnet_legacy2yaml "options lnet networks=tcp0(eth0),o2ib(ib0)"

The script prints a YAML document to stdout:

ip2nets:
  - net-spec: tcp0
    interfaces:
         0: eth0
    ip-range:
         0: 192.168.0.[2,4]
  - net-spec: tcp
    ip-range:
         0: 192.168.0.*

net:
  - net type: tcp
    local NI(s):
    -     interfaces:
              0: eth0

Notes / Assumptions:
  * We preserve rule order.
  * Interface order is preserved as given.
  * Multiple IP patterns in one rule are enumerated (0,1,2,...).
  * Comments beginning with # (outside of quotes) are stripped.
  * Rule separators: semicolon ';' or newline. A trailing semicolon is fine.
  * We do only light validation of IP range tokens; anything matching the
    allowed character class for r-expr segments is accepted.
  * Patterns are not expanded—left exactly as provided for user-space matcher.
  * Routes can be specified in two formats:
    - Legacy: <network> [<hop>] <gateway>[@<exit>][:<priority>]
    - Alternative: <network>: { gateway: <gateway>@<exit> [hop: <hop>] [priority: <priority>] }
      (The alternative format is used by lustre_routes_config script)
  * Networks parameter format: comma-separated list of <nettype>[<number>][(<iface-list>)]
    where <iface-list> is a comma-separated list of interface names.

Exit codes:
  0 success
  1 usage / argument error
  2 parse error (with message on stderr)

Author: (generated by automation) | Version: 1.0 (canonical script name lnet_legacy2yaml)
Python compatibility: 3.6+ (avoids __future__ annotations & PEP 585 generics)
"""

import re
import sys
import argparse
from typing import List, Optional, Dict

# Generic regex to detect and extract quoted parameter blocks (ip2nets/routes/networks)
# Use greedy match to properly capture nested quotes of opposite type
PARAM_ASSIGN_RE = re.compile(r"(?P<name>ip2nets|routes|networks)\s*=\s*(?P<quote>['\"])(?P<body>.*?)(?P=quote)", re.DOTALL)

# Regex to extract unquoted parameter values (for modprobe files without quotes)
# Matches until end of line, semicolon, or next parameter
PARAM_ASSIGN_UNQUOTED_RE = re.compile(r"(?P<name>ip2nets|routes|networks)\s*=\s*(?P<body>[^'\";\n][^\n;]*?)(?=\s+\w+\s*=|\s*;|\s*$)", re.DOTALL)

# Regex to parse a single rule: <net-spec> <ip-range>+
# <net-spec> ::= <network>[ ( <iface-list> ) ]
# <network>  ::= <nettype><number?>  (nettype letters, number optional)
# Allow network names with embedded digits (e.g., o2ib, o2ib0, tcp1) by accepting
# an initial alphabetic character followed by alphanumerics. This is looser than
# the previous pattern which rejected names like 'o2ib0'.
RULE_HEADER_RE = re.compile(
    r"^\s*(?P<network>[A-Za-z][A-Za-z0-9]*)(?:\((?P<ifaces>[A-Za-z0-9_,.:/-]+)\))?\s+(?P<rest>.+?)\s*$"
)

# A loose matcher for an IP pattern token of the legacy form.
# (We do not deeply validate numeric bounds; we only assert allowed chars and 3 dots)
IP_PATTERN_RE = re.compile(r"^[A-Za-z0-9\[*\].,/\-]+\.[A-Za-z0-9\[*\].,/\-]+\.[A-Za-z0-9\[*\].,/\-]+\.[A-Za-z0-9\[*\].,/\-]+$")


class Rule:
    """Represents an ip2nets rule with net-spec, interfaces, and IP ranges."""

    def __init__(self, net_spec, interfaces=None, ip_ranges=None):
        # type: (str, Optional[List[str]], Optional[List[str]]) -> None
        self.net_spec = net_spec
        self.interfaces = interfaces if interfaces is not None else []
        self.ip_ranges = ip_ranges if ip_ranges is not None else []

    def to_yaml_lines(self, idx_pad=9):
        # type: (int) -> List[str]
        lines = [f"  - net-spec: {self.net_spec}"]
        if self.interfaces:
            lines.append("    interfaces:")
            for i, iface in enumerate(self.interfaces):
                lines.append(f"         {i}: {iface}")
        lines.append("    ip-range:")
        for i, rng in enumerate(self.ip_ranges):
            lines.append(f"         {i}: {yaml_quote_if_needed(rng)}")
        return lines


def extract_param_values(raw: str) -> Dict[str, List[str]]:
    """Extract quoted parameter assignment bodies (ip2nets/routes/networks) from raw.

    Returns dict mapping param name to list of bodies (order of appearance).
    If no recognized param names found, caller may treat entire raw as a
    legacy ip2nets string for backwards compatibility.
    """
    found = {}  # type: Dict[str, List[str]]
    # First try quoted parameters
    for m in PARAM_ASSIGN_RE.finditer(raw):
        found.setdefault(m.group('name'), []).append(m.group('body'))
    # Also try unquoted parameters (for modprobe files)
    for m in PARAM_ASSIGN_UNQUOTED_RE.finditer(raw):
        # Only add if not already found as quoted
        name = m.group('name')
        body = m.group('body').strip()
        if name not in found:
            found.setdefault(name, []).append(body)
    return found


def parse_modprobe_conf(content: str) -> Dict[str, List[str]]:
    """Parse modprobe configuration file content for lnet parameters.

    Extracts ip2nets, routes, and networks values from lines like:
      options lnet ip2nets="..." routes="..." networks="..."

    Returns dict mapping param name to list of bodies (order of appearance).
    """
    found = {}  # type: Dict[str, List[str]]
    # Process line by line, handling line continuations
    lines = content.split('\n')
    i = 0
    while i < len(lines):
        line = lines[i].strip()
        # Handle line continuations (backslash at end)
        while line.endswith('\\') and i + 1 < len(lines):
            line = line[:-1] + ' ' + lines[i + 1].strip()
            i += 1
        i += 1

        # Skip comments and empty lines
        if not line or line.startswith('#'):
            continue

        # Look for "options lnet ..." lines
        if line.startswith('options') and 'lnet' in line:
            # Extract parameter assignments from this line
            params = extract_param_values(line)
            for name, bodies in params.items():
                found.setdefault(name, []).extend(bodies)

    return found


def normalize(raw: str) -> str:
    # Unescape line continuations ending with backslash-newline
    raw = re.sub(r"\\\n", " ", raw)
    # Collapse repeated whitespace (but keep inside patterns unaffected)
    return raw.strip()


def split_rules(ip2nets_body: str) -> List[str]:
    # Split on semicolons OR newlines while preserving order
    # But we allow semicolons inside brackets theoretically? Semicolons not valid inside bracket grammar; so simple split is fine.
    parts: List[str] = []
    for segment in re.split(r"[;\n]", ip2nets_body):
        seg = segment.strip()
        if not seg:
            continue
        parts.append(seg)
    return parts


def strip_comment(rule: str) -> str:
    # Remove first # and following characters (simple heuristic)
    if '#' in rule:
        return rule.split('#', 1)[0].rstrip()
    return rule


def parse_rule(rule_text: str) -> Rule:
    header_match = RULE_HEADER_RE.match(rule_text)
    if not header_match:
        raise ValueError(f"Cannot parse rule header: '{rule_text}'")
    net_spec = header_match.group('network')
    ifaces_raw = header_match.group('ifaces')
    rest = header_match.group('rest').strip()
    # Split remaining part by whitespace into IP pattern tokens
    tokens = [t for t in re.split(r"\s+", rest) if t]
    if not tokens:
        raise ValueError(f"No IP patterns found for network '{net_spec}' in rule: '{rule_text}'")
    ip_ranges: List[str] = []
    for tok in tokens:
        if not IP_PATTERN_RE.match(tok):
            # Check if this looks like another network specification (contains letters and possibly parens)
            if re.match(r'^[A-Za-z][A-Za-z0-9]*(?:\([^)]+\))?$', tok):
                raise ValueError(
                    f"Detected what looks like a second network specification '{tok}' in rule: '{rule_text}'. "
                    f"Multiple network rules must be separated by semicolons (;)."
                )
            raise ValueError(f"Invalid-looking IP pattern token '{tok}' in rule: '{rule_text}'")
        ip_ranges.append(tok)
    interfaces: List[str] = []
    if ifaces_raw:
        interfaces = [i.strip() for i in ifaces_raw.split(',') if i.strip()]
        if not interfaces:
            raise ValueError(f"Empty interface list in rule: '{rule_text}'")
    return Rule(net_spec=net_spec, interfaces=interfaces, ip_ranges=ip_ranges)


def convert_ip2nets_body(body: str) -> List[Rule]:
    body = normalize(body)
    if not body:
        return []
    rules: List[Rule] = []
    for rt in split_rules(body):
        rt_nc = strip_comment(rt)
        if not rt_nc:
            continue
        rules.append(parse_rule(rt_nc))
    return rules


###############################################
# networks= parsing support
###############################################


class NetworkEntry:
    """Represents a network entry with net type and optional interfaces."""

    def __init__(self, net_type, interfaces=None):
        # type: (str, Optional[List[str]]) -> None
        self.net_type = net_type
        self.interfaces = interfaces if interfaces is not None else []

    def to_yaml_lines(self):
        # type: () -> List[str]
        lines = [f"-     net type: {self.net_type}"]
        if self.interfaces:
            lines.append("      local NI(s):")
            lines.append("      -     interfaces:")
            for i, iface in enumerate(self.interfaces):
                lines.append(f"                  {i}: {iface}")
        return lines


# Regex to parse a single network specification: <nettype>[<number>][(<iface-list>)]
# Examples: tcp, tcp0, tcp0(eth0), o2ib(ib0), tcp0(eth0,eth1)
NETWORK_SPEC_RE = re.compile(
    r"^\s*(?P<nettype>[A-Za-z][A-Za-z0-9]*)(?:\((?P<ifaces>[A-Za-z0-9_,.:/-]+)\))?\s*$"
)


def parse_networks_body(body: str) -> List[NetworkEntry]:
    """Parse networks parameter body into list of NetworkEntry objects.

    Format: comma-separated list of <nettype>[<number>][(<iface-list>)]
    Examples:
        tcp
        tcp0(eth0)
        tcp0(eth0,eth1)
        tcp0(eth0),o2ib(ib0)
    """
    body = normalize(body)
    if not body:
        return []

    entries: List[NetworkEntry] = []
    # Split on commas, but not those inside parentheses
    # Use a simple state machine to track whether we're inside parens
    specs = []
    current = []
    depth = 0
    for char in body:
        if char == '(':
            depth += 1
            current.append(char)
        elif char == ')':
            depth -= 1
            current.append(char)
        elif char == ',' and depth == 0:
            # This comma is a separator
            spec = ''.join(current).strip()
            if spec:
                specs.append(spec)
            current = []
        else:
            current.append(char)
    # Don't forget the last spec
    spec = ''.join(current).strip()
    if spec:
        specs.append(spec)

    for net_spec in specs:
        match = NETWORK_SPEC_RE.match(net_spec)
        if not match:
            raise ValueError(f"Invalid network specification: '{net_spec}'")

        net_type = match.group('nettype')
        ifaces_raw = match.group('ifaces')

        interfaces: List[str] = []
        if ifaces_raw:
            interfaces = [i.strip() for i in ifaces_raw.split(',') if i.strip()]
            if not interfaces:
                raise ValueError(f"Empty interface list in network spec: '{net_spec}'")

        entries.append(NetworkEntry(net_type=net_type, interfaces=interfaces))

    return entries


###############################################
# routes= parsing support
###############################################


class RouteEntry:
    """Represents a route entry with net, gateway, hop, and priority."""

    def __init__(self, net, gateway, hop=None, priority=None):
        # type: (str, str, Optional[int], Optional[int]) -> None
        self.net = net
        self.gateway = gateway
        self.hop = hop
        self.priority = priority

    def to_yaml_lines(self):
        # type: () -> List[str]
        lines = ["  - net: " + self.net, "    gateway: " + self.gateway]
        if self.hop is not None:
            lines.append(f"    hop: {self.hop}")
        if self.priority is not None:
            lines.append(f"    priority: {self.priority}")
        return lines


HOP_RE = re.compile(r"^(?:[1-9][0-9]{0,2}|0)$")  # allow 0 though typical hopcount 1-255
PRIORITY_RE = re.compile(r"^[0-9]+$")

# Alternative route format: <network>: { gateway: <gateway>@<exit network> [hop: <hop>] [priority: <priority>] }
ALT_ROUTE_RE = re.compile(
    r"^\s*(?P<network>[A-Za-z][A-Za-z0-9]*)\s*:\s*\{\s*gateway:\s*(?P<gateway>[^\s,}]+)(?:\s*,\s*(?P<params>.*?))?\s*\}\s*$"
)


def parse_alternative_route(line: str) -> Optional[RouteEntry]:
    """Parse alternative route format used by lustre_routes_config.

    Format: <network>: { gateway: <gateway>@<exit network> [hop: <hop>] [priority: <priority>] }

    Examples:
        tcp1: { gateway: 10.1.1.2@tcp0, priority: 3 }
        tcp4: { gateway: 10.3.3.4@tcp }
        tcp6: { gateway: 10.3.3.6@tcp, hop: 2, priority: 5 }
        tcp7: { gateway: 10.3.3.[6-12]@tcp, priority: 20, hop: 8 }

    Returns RouteEntry or None if format doesn't match.
    """
    match = ALT_ROUTE_RE.match(line)
    if not match:
        return None

    net = match.group('network')
    gateway = match.group('gateway')
    params_str = match.group('params')

    hop = None
    priority = None

    if params_str:
        # Parse comma-separated key: value pairs
        for param in params_str.split(','):
            param = param.strip()
            if not param:
                continue
            if ':' not in param:
                continue
            key, value = param.split(':', 1)
            key = key.strip()
            value = value.strip()

            if key == 'hop':
                try:
                    hop_val = int(value)
                    if 1 <= hop_val <= 255:
                        hop = hop_val
                    else:
                        raise ValueError(f"routes: hopcount {hop_val} out of range 1-255 in '{line}'")
                except ValueError as e:
                    if "out of range" in str(e):
                        raise
                    raise ValueError(f"routes: invalid hop value '{value}' in '{line}'")
            elif key == 'priority':
                try:
                    priority = int(value)
                except ValueError:
                    raise ValueError(f"routes: invalid priority value '{value}' in '{line}'")

    return RouteEntry(net=net, gateway=gateway, hop=hop, priority=priority)


def parse_routes_body(body: str) -> List[RouteEntry]:
    body = normalize(body)
    if not body:
        return []
    entries: List[RouteEntry] = []
    for raw_route in split_rules(body):
        line = strip_comment(raw_route)
        if not line:
            continue

        # Try alternative format first
        alt_entry = parse_alternative_route(line)
        if alt_entry is not None:
            entries.append(alt_entry)
            continue

        # Fall back to legacy format
        tokens = [t for t in line.split() if t]
        if len(tokens) < 2:
            raise ValueError(f"routes: incomplete route specification '{line}'")
        net = tokens[0]
        idx = 1
        hop: Optional[int] = None
        # Detect hop count (1-255) - be lenient but validate range
        if idx < len(tokens) and HOP_RE.match(tokens[idx]):
            hop_candidate = int(tokens[idx])
            if 1 <= hop_candidate <= 255:
                hop = hop_candidate
                idx += 1
            else:
                raise ValueError(f"routes: hopcount {hop_candidate} out of range 1-255 in '{line}'")
        if idx >= len(tokens):
            raise ValueError(f"routes: missing gateway specs in '{line}'")
        gw_tokens = tokens[idx:]

        # Check for multiple gateway tokens - likely missing semicolon between route rules
        if len(gw_tokens) > 1:
            raise ValueError(
                f"routes: multiple gateway specifications found in single rule '{line}'. "
                f"If you intended multiple routes, separate them with semicolons (;). "
                f"Found gateways: {', '.join(gw_tokens)}"
            )

        gw_tok = gw_tokens[0]
        # Split optional priority suffix (colon + digits) taking care not to
        # misparse potential IPv6 addresses (very naive: only treat last
        # colon as priority separator if rhs is all digits and there is an '@').
        priority = None
        base = gw_tok
        if '@' in gw_tok and ':' in gw_tok:
            head, tail = gw_tok.rsplit(':', 1)
            if PRIORITY_RE.match(tail):
                base = head
                priority = int(tail)
        entries.append(RouteEntry(net=net, gateway=base, hop=hop, priority=priority))
    return entries


def emit_yaml(ip_rules: List[Rule], route_entries: List[RouteEntry], network_entries: List[NetworkEntry]) -> str:
    lines: List[str] = []
    if network_entries:
        lines.append("net:")
        for n in network_entries:
            lines.extend(n.to_yaml_lines())
    if ip_rules:
        lines.append("ip2nets:")
        for r in ip_rules:
            lines.extend(r.to_yaml_lines())
    if route_entries:
        lines.append("route:")
        for e in route_entries:
            lines.extend(e.to_yaml_lines())
    return "\n".join(lines) + ("\n" if lines else "")


def yaml_quote_if_needed(token: str) -> str:
    """Quote tokens that would be misinterpreted by YAML.

    Current rule: if the token starts with '*', YAML would treat it as an alias
    indicator, so we wrap it in double quotes. We also escape embedded quotes
    defensively (though they are unlikely in ip2nets patterns).
    """
    if token.startswith('*'):
        return '"' + token.replace('"', '\\"') + '"'
    return token


def main(argv: List[str]) -> int:
    parser = argparse.ArgumentParser(
        description="Convert legacy Lustre ip2nets/routes/networks syntax to YAML for lnetctl import"
    )
    parser.add_argument('inline', nargs='?', help='Inline legacy string or options lnet line (optional).')
    parser.add_argument('-i', '--ip2nets', action='append', metavar='FILE', help='File containing ip2nets rules (may be used multiple times).')
    parser.add_argument('-r', '--routes', action='append', metavar='FILE', help='File containing routes rules (may be used multiple times).')
    parser.add_argument('-n', '--networks', action='append', metavar='FILE', help='File containing networks rules (may be used multiple times).')
    parser.add_argument('-m', '--modprobe', action='append', metavar='FILE', help='Modprobe configuration file to parse for lnet parameters (may be used multiple times).')
    parser.add_argument('-o', '--output', metavar='FILE', help='Write YAML output to FILE instead of stdout.')
    parser.add_argument('--default-hop', nargs='?', const=1, type=int, metavar='HOP',
                        help='Assign hop=HOP to any route entries without an explicit hopcount (default 1 if value omitted).')
    args = parser.parse_args(argv[1:])

    # Aggregate sources
    ip_rules: List[Rule] = []
    route_entries: List[RouteEntry] = []
    network_entries: List[NetworkEntry] = []

    def read_file(path: str) -> str:
        try:
            with open(path, 'r', encoding='utf-8') as fh:
                return fh.read()
        except OSError as e:
            print(f"Error: cannot read {path}: {e}", file=sys.stderr)
            sys.exit(2)

    # Process modprobe configuration files first (if any)
    if args.modprobe:
        for f in args.modprobe:
            try:
                content = read_file(f)
                params = parse_modprobe_conf(content)
                if 'ip2nets' in params:
                    for body in params['ip2nets']:
                        ip_rules.extend(convert_ip2nets_body(body))
                if 'routes' in params:
                    for body in params['routes']:
                        route_entries.extend(parse_routes_body(body))
                if 'networks' in params:
                    for body in params['networks']:
                        network_entries.extend(parse_networks_body(body))
            except ValueError as e:
                print(f"Error in modprobe file {f}: {e}", file=sys.stderr)
                return 2

    # Process file-based ip2nets
    if args.ip2nets:
        for f in args.ip2nets:
            try:
                ip_rules.extend(convert_ip2nets_body(read_file(f)))
            except ValueError as e:
                print(f"Error in ip2nets file {f}: {e}", file=sys.stderr)
                return 2

    # Process file-based routes
    if args.routes:
        for f in args.routes:
            try:
                route_entries.extend(parse_routes_body(read_file(f)))
            except ValueError as e:
                print(f"Error in routes file {f}: {e}", file=sys.stderr)
                return 2

    # Process file-based networks
    if args.networks:
        for f in args.networks:
            try:
                network_entries.extend(parse_networks_body(read_file(f)))
            except ValueError as e:
                print(f"Error in networks file {f}: {e}", file=sys.stderr)
                return 2

    # Inline / stdin content
    inline_raw = None
    if args.inline is not None:
        inline_raw = args.inline
    else:
        # If no inline and no files, read stdin (heuristic: data piped in)
        if not sys.stdin.isatty():
            inline_raw = sys.stdin.read()

    if inline_raw:
        try:
            params = extract_param_values(inline_raw)
            if 'ip2nets' in params:
                for body in params['ip2nets']:
                    ip_rules.extend(convert_ip2nets_body(body))
            if 'routes' in params:
                for body in params['routes']:
                    route_entries.extend(parse_routes_body(body))
            if 'networks' in params:
                for body in params['networks']:
                    network_entries.extend(parse_networks_body(body))
            if not params:  # treat entire inline as ip2nets if not already parsed
                ip_rules.extend(convert_ip2nets_body(inline_raw))
        except ValueError as e:
            print(f"Error: {e}", file=sys.stderr)
            return 2

    if not ip_rules and not route_entries and not network_entries:
        print("Error: no ip2nets, routes, or networks data provided (use -m/-i/-r, inline arg, or stdin)", file=sys.stderr)
        return 1

    # Apply default hop=1 if requested and hop unspecified
    if args.default_hop is not None:
        if not (1 <= args.default_hop <= 255):
            print(f"Error: --default-hop value {args.default_hop} out of range (1-255)", file=sys.stderr)
            return 2
        for e in route_entries:
            if e.hop is None:
                e.hop = args.default_hop

    yaml_out = emit_yaml(ip_rules, route_entries, network_entries)
    if args.output:
        try:
            with open(args.output, 'w', encoding='utf-8') as fh:
                fh.write(yaml_out)
        except OSError as e:
            print(f"Error: cannot write output file {args.output}: {e}", file=sys.stderr)
            return 2
    else:
        sys.stdout.write(yaml_out)
    return 0


if __name__ == '__main__':
    sys.exit(main(sys.argv))
