diff --git a/nodescraper/connection/inband/inbandremote.py b/nodescraper/connection/inband/inbandremote.py index d5254468..9e2415ed 100644 --- a/nodescraper/connection/inband/inbandremote.py +++ b/nodescraper/connection/inband/inbandremote.py @@ -157,8 +157,8 @@ def run_command( stdin.flush() stdin.channel.shutdown_write() - stdout_str = stdout.read().decode("utf-8") - stderr_str = stderr.read().decode("utf-8") + stdout_str = stdout.read().decode("utf-8", errors="replace") + stderr_str = stderr.read().decode("utf-8", errors="replace") exit_code = stdout.channel.recv_exit_status() except TimeoutError: stderr_str = "Command timed out" diff --git a/nodescraper/plugins/inband/network/network_collector.py b/nodescraper/plugins/inband/network/network_collector.py index ea978fd1..d530bd98 100644 --- a/nodescraper/plugins/inband/network/network_collector.py +++ b/nodescraper/plugins/inband/network/network_collector.py @@ -33,25 +33,11 @@ from .collector_args import NetworkCollectorArgs from .networkdata import ( - BroadcomNicDevice, - BroadcomNicQos, - BroadcomNicQosAppEntry, EthtoolInfo, IpAddress, Neighbor, NetworkDataModel, NetworkInterface, - PensandoNicCard, - PensandoNicDcqcn, - PensandoNicEnvironment, - PensandoNicPcieAts, - PensandoNicPort, - PensandoNicQos, - PensandoNicQosScheduling, - PensandoNicRdmaStatistic, - PensandoNicRdmaStatistics, - PensandoNicVersionFirmware, - PensandoNicVersionHostSoftware, Route, RoutingRule, ) @@ -75,21 +61,6 @@ class NetworkCollector(InBandDataCollector[NetworkDataModel, NetworkCollectorArg CMD_LLDPCLI_NEIGHBOR = "lldpcli show neighbor" CMD_LLDPCTL = "lldpctl" - # Broadcom NIC commands - CMD_NICCLI_LISTDEV = "niccli --list_devices" - CMD_NICCLI_GETQOS_TEMPLATE = "niccli --dev {device_num} qos --ets --show" - - # Pensando NIC commands - CMD_NICCTL_CARD = "nicctl show card" - CMD_NICCTL_DCQCN = "nicctl show dcqcn" - CMD_NICCTL_ENVIRONMENT = "nicctl show environment" - CMD_NICCTL_PCIE_ATS = "nicctl show pcie ats" - CMD_NICCTL_PORT = "nicctl show port" - CMD_NICCTL_QOS = "nicctl show qos" - CMD_NICCTL_RDMA_STATISTICS = "nicctl show rdma statistics" - CMD_NICCTL_VERSION_HOST_SOFTWARE = "nicctl show version host-software" - CMD_NICCTL_VERSION_FIRMWARE = "nicctl show version firmware" - def _parse_ip_addr(self, output: str) -> List[NetworkInterface]: """Parse 'ip addr show' output into NetworkInterface objects. @@ -502,920 +473,6 @@ def _parse_ethtool_statistics(self, output: str, interface: str) -> Dict[str, st stats_dict[key.strip()] = value.strip() return stats_dict - def _parse_niccli_listdev(self, output: str) -> List[BroadcomNicDevice]: - """Parse 'niccli --list_devices' output into BroadcomNicDevice objects. - - Args: - output: Raw output from 'niccli --list_devices' command - - Returns: - List of BroadcomNicDevice objects - """ - devices = [] - current_device = None - - for line in output.splitlines(): - line_stripped = line.strip() - if not line_stripped: - continue - - # Check if this is a device header line - match = re.match(r"^(\d+)\s*\)\s*(.+?)(?:\s+\((.+?)\))?$", line_stripped) - if match: - device_num_str = match.group(1) - model = match.group(2).strip() if match.group(2) else None - adapter_port = match.group(3).strip() if match.group(3) else None - - try: - device_num = int(device_num_str) - except ValueError: - continue - - current_device = BroadcomNicDevice( - device_num=device_num, - model=model, - adapter_port=adapter_port, - ) - devices.append(current_device) - - # Check for Device Interface Name line - elif "Device Interface Name" in line and current_device: - parts = line_stripped.split(":") - if len(parts) >= 2: - current_device.interface_name = parts[1].strip() - - # Check for MAC Address line - elif "MAC Address" in line and current_device: - parts = line_stripped.split(":") - if len(parts) >= 2: - # MAC address has colons, so rejoin the parts after first split - mac = ":".join(parts[1:]).strip() - current_device.mac_address = mac - - # Check for PCI Address line - elif "PCI Address" in line and current_device: - parts = line_stripped.split(":") - if len(parts) >= 2: - # PCI address also has colons, rejoin - pci = ":".join(parts[1:]).strip() - current_device.pci_address = pci - - return devices - - def _parse_nicctl_card(self, output: str) -> List[PensandoNicCard]: - """Parse 'nicctl show card' output into PensandoNicCard objects. - - Args: - output: Raw output from 'nicctl show card' command - - Returns: - List of PensandoNicCard objects - """ - cards = [] - - # Skip header lines and separator lines - in_data_section = False - - for line in output.splitlines(): - line_stripped = line.strip() - if not line_stripped: - continue - - # Skip header line (starts with "Id") - if line_stripped.startswith("Id"): - in_data_section = True - continue - - # Skip separator lines (mostly dashes) - if re.match(r"^-+$", line_stripped): - continue - - # Parse data lines after header - if in_data_section: - # Split by whitespace - parts = line_stripped.split() - - # Expected format: Id PCIe_BDF ASIC F/W_partition Serial_number - if len(parts) >= 2: - card = PensandoNicCard( - id=parts[0], - pcie_bdf=parts[1], - asic=parts[2] if len(parts) > 2 else None, - fw_partition=parts[3] if len(parts) > 3 else None, - serial_number=parts[4] if len(parts) > 4 else None, - ) - cards.append(card) - - return cards - - def _parse_nicctl_dcqcn(self, output: str) -> List[PensandoNicDcqcn]: - """Parse 'nicctl show dcqcn' output into PensandoNicDcqcn objects. - - Args: - output: Raw output from 'nicctl show dcqcn' command - - Returns: - List of PensandoNicDcqcn objects - """ - dcqcn_entries = [] - current_entry = None - - for line in output.splitlines(): - line_stripped = line.strip() - if not line_stripped: - continue - - # Check for NIC line - if line_stripped.startswith("NIC :"): - # Save previous entry if exists - if current_entry: - dcqcn_entries.append(current_entry) - - # Parse NIC ID and PCIe BDF - # Format: "NIC : ()" - match = re.match( - r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)", line_stripped, re.IGNORECASE - ) - if match: - nic_id = match.group(1) - pcie_bdf = match.group(2) - current_entry = PensandoNicDcqcn( - nic_id=nic_id, - pcie_bdf=pcie_bdf, - ) - continue - - # Skip separator lines (dashes or asterisks) - if re.match(r"^[-*]+$", line_stripped): - continue - - # Parse fields within current entry - if current_entry and ":" in line_stripped: - parts = line_stripped.split(":", 1) - if len(parts) == 2: - key = parts[0].strip() - value = parts[1].strip() - - if key == "Lif id": - current_entry.lif_id = value - elif key == "ROCE device": - current_entry.roce_device = value - elif key == "DCQCN profile id": - current_entry.dcqcn_profile_id = value - elif key == "Status": - current_entry.status = value - - # Add the last entry if exists - if current_entry: - dcqcn_entries.append(current_entry) - - return dcqcn_entries - - def _parse_nicctl_environment(self, output: str) -> List[PensandoNicEnvironment]: - """Parse 'nicctl show environment' output into PensandoNicEnvironment objects. - - Args: - output: Raw output from 'nicctl show environment' command - - Returns: - List of PensandoNicEnvironment objects - """ - environment_entries = [] - current_entry = None - - for line in output.splitlines(): - line_stripped = line.strip() - if not line_stripped: - continue - - # Check for NIC line - if line_stripped.startswith("NIC :"): - # Save previous entry if exists - if current_entry: - environment_entries.append(current_entry) - - # Parse NIC ID and PCIe BDF - # Format: "NIC : ()" - match = re.match( - r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)", line_stripped, re.IGNORECASE - ) - if match: - nic_id = match.group(1) - pcie_bdf = match.group(2) - current_entry = PensandoNicEnvironment( - nic_id=nic_id, - pcie_bdf=pcie_bdf, - ) - continue - - # Skip separator lines (dashes) - if re.match(r"^-+$", line_stripped): - continue - - # Skip section headers (Power(W):, Temperature(C):, etc.) - if line_stripped.endswith("):"): - continue - - # Parse fields within current entry - if current_entry and ":" in line_stripped: - parts = line_stripped.split(":", 1) - if len(parts) == 2: - key = parts[0].strip() - value_str = parts[1].strip() - - # Try to parse the value as float - try: - value = float(value_str) - except ValueError: - continue - - # Map keys to fields - if key == "Total power drawn (pin)" or key == "Total power drawn": - current_entry.total_power_drawn = value - elif key == "Core power (pout1)" or key == "Core power": - current_entry.core_power = value - elif key == "ARM power (pout2)" or key == "ARM power": - current_entry.arm_power = value - elif key == "Local board temperature": - current_entry.local_board_temperature = value - elif key == "Die temperature": - current_entry.die_temperature = value - elif key == "Input voltage": - current_entry.input_voltage = value - elif key == "Core voltage": - current_entry.core_voltage = value - elif key == "Core frequency": - current_entry.core_frequency = value - elif key == "CPU frequency": - current_entry.cpu_frequency = value - elif key == "P4 stage frequency": - current_entry.p4_stage_frequency = value - - # Add the last entry if exists - if current_entry: - environment_entries.append(current_entry) - - return environment_entries - - def _parse_nicctl_pcie_ats(self, output: str) -> List[PensandoNicPcieAts]: - """Parse 'nicctl show pcie ats' output into PensandoNicPcieAts objects. - - Args: - output: Raw output from 'nicctl show pcie ats' command - - Returns: - List of PensandoNicPcieAts objects - """ - pcie_ats_entries = [] - - for line in output.splitlines(): - line_stripped = line.strip() - if not line_stripped: - continue - - # Parse line format: "NIC : () : " - if line_stripped.startswith("NIC :"): - match = re.match( - r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)\s*:\s*(\w+)", - line_stripped, - re.IGNORECASE, - ) - if match: - nic_id = match.group(1) - pcie_bdf = match.group(2) - status = match.group(3) - entry = PensandoNicPcieAts( - nic_id=nic_id, - pcie_bdf=pcie_bdf, - status=status, - ) - pcie_ats_entries.append(entry) - - return pcie_ats_entries - - def _parse_nicctl_port(self, output: str) -> List[PensandoNicPort]: - """Parse 'nicctl show port' output into PensandoNicPort objects. - - Args: - output: Raw output from 'nicctl show port' command - - Returns: - List of PensandoNicPort objects - """ - port_entries = [] - current_entry = None - current_section = None # 'spec' or 'status' - current_nic_id = None - current_pcie_bdf = None - - for line in output.splitlines(): - line_stripped = line.strip() - if not line_stripped: - continue - - # Check for NIC line - if line_stripped.startswith("NIC") and ":" in line_stripped: - # Save previous entry if exists - if current_entry: - port_entries.append(current_entry) - current_entry = None - - # Parse NIC ID and PCIe BDF - match = re.match( - r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)", line_stripped, re.IGNORECASE - ) - if match: - current_nic_id = match.group(1) - current_pcie_bdf = match.group(2) - continue - - # Check for Port line - if ( - line_stripped.startswith("Port") - and ":" in line_stripped - and current_nic_id - and current_pcie_bdf - ): - # Save previous entry if exists - if current_entry: - port_entries.append(current_entry) - - # Parse Port ID and Port name - match = re.match( - r"Port\s*:\s*([a-f0-9\-]+)\s*\(([^\)]+)\)", line_stripped, re.IGNORECASE - ) - if match: - port_id = match.group(1) - port_name = match.group(2) - current_entry = PensandoNicPort( - nic_id=current_nic_id, - pcie_bdf=current_pcie_bdf, - port_id=port_id, - port_name=port_name, - ) - continue - - # Skip separator lines (dashes) - if re.match(r"^-+$", line_stripped): - continue - - # Check for section headers - if line_stripped.endswith(":"): - if line_stripped == "Spec:": - current_section = "spec" - elif line_stripped == "Status:": - current_section = "status" - continue - - # Parse fields within current entry and section - if current_entry and current_section and ":" in line_stripped: - parts = line_stripped.split(":", 1) - if len(parts) == 2: - key = parts[0].strip() - value = parts[1].strip() - - if current_section == "spec": - if key == "Ifindex": - current_entry.spec_ifindex = value - elif key == "Type": - current_entry.spec_type = value - elif key == "speed": - current_entry.spec_speed = value - elif key == "Admin state": - current_entry.spec_admin_state = value - elif key == "FEC type": - current_entry.spec_fec_type = value - elif key == "Pause type": - current_entry.spec_pause_type = value - elif key == "Number of lanes": - try: - current_entry.spec_num_lanes = int(value) - except ValueError: - pass - elif key == "MTU": - try: - current_entry.spec_mtu = int(value) - except ValueError: - pass - elif key == "TX pause": - current_entry.spec_tx_pause = value - elif key == "RX pause": - current_entry.spec_rx_pause = value - elif key == "Auto negotiation": - current_entry.spec_auto_negotiation = value - elif current_section == "status": - if key == "Physical port": - try: - current_entry.status_physical_port = int(value) - except ValueError: - pass - elif key == "Operational status": - current_entry.status_operational_status = value - elif key == "Link FSM state": - current_entry.status_link_fsm_state = value - elif key == "FEC type": - current_entry.status_fec_type = value - elif key == "Cable type": - current_entry.status_cable_type = value - elif key == "Number of lanes": - try: - current_entry.status_num_lanes = int(value) - except ValueError: - pass - elif key == "speed": - current_entry.status_speed = value - elif key == "Auto negotiation": - current_entry.status_auto_negotiation = value - elif key == "MAC ID": - try: - current_entry.status_mac_id = int(value) - except ValueError: - pass - elif key == "MAC channel": - try: - current_entry.status_mac_channel = int(value) - except ValueError: - pass - elif key == "MAC address": - current_entry.status_mac_address = value - elif key == "Transceiver type": - current_entry.status_transceiver_type = value - elif key == "Transceiver state": - current_entry.status_transceiver_state = value - elif key == "Transceiver PID": - current_entry.status_transceiver_pid = value - - # Add the last entry if exists - if current_entry: - port_entries.append(current_entry) - - return port_entries - - def _parse_nicctl_qos(self, output: str) -> List[PensandoNicQos]: - """Parse 'nicctl show qos' output into PensandoNicQos objects. - - Args: - output: Raw output from 'nicctl show qos' command - - Returns: - List of PensandoNicQos objects - """ - qos_entries = [] - current_entry = None - current_nic_id = None - current_pcie_bdf = None - in_scheduling_table = False - - for line in output.splitlines(): - line_stripped = line.strip() - if not line_stripped: - continue - - # Check for NIC line: "NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0)" - if line_stripped.startswith("NIC") and ":" in line_stripped: - # Save previous entry if exists - if current_entry: - qos_entries.append(current_entry) - current_entry = None - - # Parse NIC ID and PCIe BDF - match = re.match( - r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)", line_stripped, re.IGNORECASE - ) - if match: - current_nic_id = match.group(1) - current_pcie_bdf = match.group(2) - in_scheduling_table = False - continue - - # Check for Port line: "Port : 0490814a-6c40-4242-4242-000011010000" - if ( - line_stripped.startswith("Port") - and ":" in line_stripped - and current_nic_id - and current_pcie_bdf - ): - # Save previous entry if exists - if current_entry: - qos_entries.append(current_entry) - - # Parse Port ID - parts = line_stripped.split(":") - if len(parts) >= 2: - port_id = parts[1].strip() - current_entry = PensandoNicQos( - nic_id=current_nic_id, - pcie_bdf=current_pcie_bdf, - port_id=port_id, - ) - in_scheduling_table = False - continue - - # Skip separator lines (dashes) but don't reset scheduling table flag - if re.match(r"^-+$", line_stripped): - continue - - # Check for section headers - if current_entry: - # Classification type - if "Classification type" in line: - parts = line_stripped.split(":") - if len(parts) >= 2: - current_entry.classification_type = parts[1].strip() - - # DSCP bitmap - elif "DSCP bitmap" in line and "==>" in line: - parts = line_stripped.split("==>") - if len(parts) >= 2: - bitmap_part = parts[0].split(":") - if len(bitmap_part) >= 2: - current_entry.dscp_bitmap = bitmap_part[1].strip() - priority_part = parts[1].split(":") - if len(priority_part) >= 2: - try: - current_entry.dscp_priority = int(priority_part[1].strip()) - except ValueError: - pass - - # DSCP range - elif line_stripped.startswith("DSCP") and "==>" in line and "bitmap" not in line: - parts = line_stripped.split("==>") - if len(parts) >= 2: - dscp_part = parts[0].split(":") - if len(dscp_part) >= 2: - current_entry.dscp_range = dscp_part[1].strip() - priority_part = parts[1].split(":") - if len(priority_part) >= 2: - try: - current_entry.dscp_priority = int(priority_part[1].strip()) - except ValueError: - pass - - # PFC priority bitmap - elif "PFC priority bitmap" in line: - parts = line_stripped.split(":") - if len(parts) >= 2: - current_entry.pfc_priority_bitmap = parts[1].strip() - - # PFC no-drop priorities - elif "PFC no-drop priorities" in line: - parts = line_stripped.split(":") - if len(parts) >= 2: - current_entry.pfc_no_drop_priorities = parts[1].strip() - - # Scheduling table header - elif "Priority" in line and "Scheduling" in line: - in_scheduling_table = True - continue - - # Parse scheduling table entries - elif in_scheduling_table and not line_stripped.startswith("---"): - # Try to parse scheduling entry - # Format: "0 DWRR 0 N/A" - parts = line_stripped.split() - if len(parts) >= 2: - try: - priority = int(parts[0]) - scheduling_type = parts[1] if len(parts) > 1 else None - bandwidth = None - rate_limit = None - if len(parts) > 2: - try: - bandwidth = int(parts[2]) - except ValueError: - pass - if len(parts) > 3: - rate_limit = parts[3] - - sched_entry = PensandoNicQosScheduling( - priority=priority, - scheduling_type=scheduling_type, - bandwidth=bandwidth, - rate_limit=rate_limit, - ) - current_entry.scheduling.append(sched_entry) - except (ValueError, IndexError): - pass - - # Add the last entry if exists - if current_entry: - qos_entries.append(current_entry) - - return qos_entries - - def _parse_nicctl_rdma_statistics(self, output: str) -> List[PensandoNicRdmaStatistics]: - """Parse 'nicctl show rdma statistics' output into PensandoNicRdmaStatistics objects. - - Args: - output: Raw output from 'nicctl show rdma statistics' command - - Returns: - List of PensandoNicRdmaStatistics objects - """ - rdma_stats_entries = [] - current_entry = None - in_statistics_table = False - - for line in output.splitlines(): - line_stripped = line.strip() - if not line_stripped: - continue - - # Check for NIC line: "NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0)" - if line_stripped.startswith("NIC") and ":" in line_stripped: - # Save previous entry if exists - if current_entry: - rdma_stats_entries.append(current_entry) - - # Parse NIC ID and PCIe BDF - match = re.match( - r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)", line_stripped, re.IGNORECASE - ) - if match: - nic_id = match.group(1) - pcie_bdf = match.group(2) - current_entry = PensandoNicRdmaStatistics( - nic_id=nic_id, - pcie_bdf=pcie_bdf, - ) - in_statistics_table = False - continue - - # Skip separator lines (dashes) - if re.match(r"^-+$", line_stripped): - continue - - # Check for table header - if "Name" in line and "Count" in line: - in_statistics_table = True - continue - - # Parse statistics entries - if current_entry and in_statistics_table: - # The format is: "Queue pair create 1" - # We need to split from the right to get the count - parts = line_stripped.rsplit(None, 1) # Split from right, max 1 split - if len(parts) == 2: - name = parts[0].strip() - count_str = parts[1].strip() - try: - count = int(count_str) - stat_entry = PensandoNicRdmaStatistic( - name=name, - count=count, - ) - current_entry.statistics.append(stat_entry) - except ValueError: - pass - - # Add the last entry if exists - if current_entry: - rdma_stats_entries.append(current_entry) - - return rdma_stats_entries - - def _parse_nicctl_version_host_software( - self, output: str - ) -> Optional[PensandoNicVersionHostSoftware]: - """Parse 'nicctl show version host-software' output into PensandoNicVersionHostSoftware object. - - Args: - output: Raw output from 'nicctl show version host-software' command - - Returns: - PensandoNicVersionHostSoftware object or None if no data found - """ - version_info = PensandoNicVersionHostSoftware() - found_data = False - - for line in output.splitlines(): - line_stripped = line.strip() - if not line_stripped or ":" not in line_stripped: - continue - - # Split on the first colon to get key and value - parts = line_stripped.split(":", 1) - if len(parts) != 2: - continue - - key = parts[0].strip().lower() - value = parts[1].strip() - - if "nicctl" in key: - version_info.nicctl = value - found_data = True - elif "ipc driver" in key or "ipc_driver" in key: - version_info.ipc_driver = value - found_data = True - elif "ionic driver" in key or "ionic_driver" in key: - version_info.ionic_driver = value - found_data = True - - return version_info if found_data else None - - def _parse_nicctl_version_firmware(self, output: str) -> List[PensandoNicVersionFirmware]: - """Parse 'nicctl show version firmware' output into PensandoNicVersionFirmware objects. - - Args: - output: Raw output from 'nicctl show version firmware' command - - Returns: - List of PensandoNicVersionFirmware objects - """ - firmware_entries = [] - current_entry = None - - for line in output.splitlines(): - line_stripped = line.strip() - if not line_stripped: - continue - - # Skip separator lines (dashes) - if re.match(r"^-+$", line_stripped): - # Save previous entry when we hit a separator - if current_entry: - firmware_entries.append(current_entry) - current_entry = None - continue - - # Check for NIC line - if line_stripped.startswith("NIC") and ":" in line_stripped: - # Save previous entry if exists - if current_entry: - firmware_entries.append(current_entry) - - # Parse NIC ID and PCIe BDF - match = re.match( - r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)", line_stripped, re.IGNORECASE - ) - if match: - nic_id = match.group(1) - pcie_bdf = match.group(2) - current_entry = PensandoNicVersionFirmware( - nic_id=nic_id, - pcie_bdf=pcie_bdf, - ) - continue - - # Parse version fields - if current_entry and ":" in line_stripped: - parts = line_stripped.split(":", 1) - if len(parts) == 2: - key = parts[0].strip().lower() - value = parts[1].strip() - - if "cpld" in key: - current_entry.cpld = value - elif "boot0" in key: - current_entry.boot0 = value - elif "uboot-a" in key or "uboot_a" in key: - current_entry.uboot_a = value - elif "firmware-a" in key or "firmware_a" in key: - current_entry.firmware_a = value - elif ( - "device config-a" in key - or "device_config_a" in key - or "device config" in key - ): - current_entry.device_config_a = value - - # Add the last entry if exists - if current_entry: - firmware_entries.append(current_entry) - - return firmware_entries - - def _parse_niccli_qos(self, device_num: int, output: str) -> BroadcomNicQos: - """Parse 'niccli --dev X qos --ets --show' output into BroadcomNicQos object. - - Args: - device_num: Device number - output: Raw output from 'niccli --dev X qos --ets --show' command - - Returns: - BroadcomNicQos object with parsed data - """ - qos_info = BroadcomNicQos(device_num=device_num, raw_output=output) - - current_app_entry = None - - for line in output.splitlines(): - line_stripped = line.strip() - if not line_stripped: - continue - - # Parse PRIO_MAP: "PRIO_MAP: 0:0 1:0 2:0 3:1 4:0 5:0 6:0 7:2" - if "PRIO_MAP:" in line: - parts = line.split("PRIO_MAP:") - if len(parts) >= 2: - prio_entries = parts[1].strip().split() - for entry in prio_entries: - if ":" in entry: - prio, tc = entry.split(":") - try: - qos_info.prio_map[int(prio)] = int(tc) - except ValueError: - pass - - # Parse TC Bandwidth: "TC Bandwidth: 50% 50% 0%" - elif "TC Bandwidth:" in line: - parts = line.split("TC Bandwidth:") - if len(parts) >= 2: - bandwidth_entries = parts[1].strip().split() - for bw in bandwidth_entries: - bw_clean = bw.rstrip("%") - try: - qos_info.tc_bandwidth.append(int(bw_clean)) - except ValueError: - pass - - # Parse TSA_MAP: "TSA_MAP: 0:ets 1:ets 2:strict" - elif "TSA_MAP:" in line: - parts = line.split("TSA_MAP:") - if len(parts) >= 2: - tsa_entries = parts[1].strip().split() - for entry in tsa_entries: - if ":" in entry: - tc, tsa = entry.split(":", 1) - try: - qos_info.tsa_map[int(tc)] = tsa - except ValueError: - pass - - # Parse PFC enabled: "PFC enabled: 3" - elif "PFC enabled:" in line: - parts = line.split("PFC enabled:") - if len(parts) >= 2: - try: - qos_info.pfc_enabled = int(parts[1].strip()) - except ValueError: - pass - - # Parse APP entries - detect start of new APP entry - elif line_stripped.startswith("APP#"): - # Save previous entry if exists - if current_app_entry: - qos_info.app_entries.append(current_app_entry) - current_app_entry = BroadcomNicQosAppEntry() - - # Parse Priority within APP entry - elif "Priority:" in line and current_app_entry is not None: - parts = line.split("Priority:") - if len(parts) >= 2: - try: - current_app_entry.priority = int(parts[1].strip()) - except ValueError: - pass - - # Parse Sel within APP entry - elif "Sel:" in line and current_app_entry is not None: - parts = line.split("Sel:") - if len(parts) >= 2: - try: - current_app_entry.sel = int(parts[1].strip()) - except ValueError: - pass - - # Parse DSCP within APP entry - elif "DSCP:" in line and current_app_entry is not None: - parts = line.split("DSCP:") - if len(parts) >= 2: - try: - current_app_entry.dscp = int(parts[1].strip()) - except ValueError: - pass - - # Parse protocol and port (e.g., "UDP or DCCP: 4791") - elif ( - "UDP" in line or "TCP" in line or "DCCP" in line - ) and current_app_entry is not None: - if ":" in line: - parts = line.split(":") - if len(parts) >= 2: - current_app_entry.protocol = parts[0].strip() - try: - current_app_entry.port = int(parts[1].strip()) - except ValueError: - pass - - # Parse TC Rate Limit: "TC Rate Limit: 100% 100% 100% 0% 0% 0% 0% 0%" - elif "TC Rate Limit:" in line: - parts = line.split("TC Rate Limit:") - if len(parts) >= 2: - rate_entries = parts[1].strip().split() - for rate in rate_entries: - rate_clean = rate.rstrip("%") - try: - qos_info.tc_rate_limit.append(int(rate_clean)) - except ValueError: - pass - - # Add the last APP entry if exists - if current_app_entry: - qos_info.app_entries.append(current_app_entry) - - return qos_info - def _collect_ethtool_info(self, interfaces: List[NetworkInterface]) -> Dict[str, EthtoolInfo]: """Collect ethtool information for all network interfaces. @@ -1496,230 +553,6 @@ def _collect_lldp_info(self) -> None: priority=EventPriority.INFO, ) - def _collect_broadcom_nic_info( - self, - ) -> Tuple[List[BroadcomNicDevice], Dict[int, BroadcomNicQos]]: - """Collect Broadcom NIC information using niccli commands. - - Returns: - Tuple of (list of BroadcomNicDevice, dict mapping device number to BroadcomNicQos) - """ - devices = [] - qos_data = {} - - # First, list devices - res_listdev = self._run_sut_cmd(self.CMD_NICCLI_LISTDEV, sudo=True) - if res_listdev.exit_code == 0: - # Parse device list - devices = self._parse_niccli_listdev(res_listdev.stdout) - self._log_event( - category=EventCategory.NETWORK, - description=f"Collected Broadcom NIC device list: {len(devices)} devices", - priority=EventPriority.INFO, - ) - - # Collect QoS info for each device - for device in devices: - cmd = self.CMD_NICCLI_GETQOS_TEMPLATE.format(device_num=device.device_num) - res_qos = self._run_sut_cmd(cmd, sudo=True) - if res_qos.exit_code == 0: - qos_info = self._parse_niccli_qos(device.device_num, res_qos.stdout) - qos_data[device.device_num] = qos_info - self._log_event( - category=EventCategory.NETWORK, - description=f"Collected Broadcom NIC QoS info for device {device.device_num}", - priority=EventPriority.INFO, - ) - else: - self._log_event( - category=EventCategory.NETWORK, - description=f"Failed to collect QoS info for device {device.device_num}", - data={"command": res_qos.command, "exit_code": res_qos.exit_code}, - priority=EventPriority.WARNING, - ) - - if qos_data: - self._log_event( - category=EventCategory.NETWORK, - description=f"Collected Broadcom NIC QoS info for {len(qos_data)} devices", - priority=EventPriority.INFO, - ) - else: - self._log_event( - category=EventCategory.NETWORK, - description="Broadcom NIC collection failed or niccli not available", - data={"command": res_listdev.command, "exit_code": res_listdev.exit_code}, - priority=EventPriority.INFO, - ) - - return devices, qos_data - - def _collect_pensando_nic_info( - self, - ) -> Tuple[ - List[PensandoNicCard], - List[PensandoNicDcqcn], - List[PensandoNicEnvironment], - List[PensandoNicPcieAts], - List[PensandoNicPort], - List[PensandoNicQos], - List[PensandoNicRdmaStatistics], - Optional[PensandoNicVersionHostSoftware], - List[PensandoNicVersionFirmware], - List[str], - ]: - """Collect Pensando NIC information using nicctl commands. - - Returns: - Tuple of (list of PensandoNicCard, list of PensandoNicDcqcn, - list of PensandoNicEnvironment, list of PensandoNicPcieAts, - list of PensandoNicPort, list of PensandoNicQos, - list of PensandoNicRdmaStatistics, - PensandoNicVersionHostSoftware object, - list of PensandoNicVersionFirmware, - list of uncollected command names) - """ - cards = [] - dcqcn_entries = [] - environment_entries = [] - pcie_ats_entries = [] - port_entries = [] - qos_entries = [] - rdma_statistics_entries = [] - version_host_software = None - version_firmware_entries = [] - - # Track which commands failed - uncollected_commands = [] - - # Parse nicctl show card output - res_card = self._run_sut_cmd(self.CMD_NICCTL_CARD, sudo=True) - if res_card.exit_code == 0: - cards = self._parse_nicctl_card(res_card.stdout) - self._log_event( - category=EventCategory.NETWORK, - description=f"Collected Pensando NIC card list: {len(cards)} cards", - priority=EventPriority.INFO, - ) - else: - uncollected_commands.append(self.CMD_NICCTL_CARD) - - # Parse nicctl show dcqcn output - res_dcqcn = self._run_sut_cmd(self.CMD_NICCTL_DCQCN, sudo=True) - if res_dcqcn.exit_code == 0: - dcqcn_entries = self._parse_nicctl_dcqcn(res_dcqcn.stdout) - self._log_event( - category=EventCategory.NETWORK, - description=f"Collected Pensando NIC DCQCN info: {len(dcqcn_entries)} entries", - priority=EventPriority.INFO, - ) - else: - uncollected_commands.append(self.CMD_NICCTL_DCQCN) - - # Parse nicctl show environment output - res_environment = self._run_sut_cmd(self.CMD_NICCTL_ENVIRONMENT, sudo=True) - if res_environment.exit_code == 0: - environment_entries = self._parse_nicctl_environment(res_environment.stdout) - self._log_event( - category=EventCategory.NETWORK, - description=f"Collected Pensando NIC environment info: {len(environment_entries)} entries", - priority=EventPriority.INFO, - ) - else: - uncollected_commands.append(self.CMD_NICCTL_ENVIRONMENT) - - # Parse nicctl show pcie ats output - res_pcie_ats = self._run_sut_cmd(self.CMD_NICCTL_PCIE_ATS, sudo=True) - if res_pcie_ats.exit_code == 0: - pcie_ats_entries = self._parse_nicctl_pcie_ats(res_pcie_ats.stdout) - self._log_event( - category=EventCategory.NETWORK, - description=f"Collected Pensando NIC PCIe ATS info: {len(pcie_ats_entries)} entries", - priority=EventPriority.INFO, - ) - else: - uncollected_commands.append(self.CMD_NICCTL_PCIE_ATS) - - # Parse nicctl show port output - res_port = self._run_sut_cmd(self.CMD_NICCTL_PORT, sudo=True) - if res_port.exit_code == 0: - port_entries = self._parse_nicctl_port(res_port.stdout) - self._log_event( - category=EventCategory.NETWORK, - description=f"Collected Pensando NIC port info: {len(port_entries)} ports", - priority=EventPriority.INFO, - ) - else: - uncollected_commands.append(self.CMD_NICCTL_PORT) - - # Parse nicctl show qos output - res_qos = self._run_sut_cmd(self.CMD_NICCTL_QOS, sudo=True) - if res_qos.exit_code == 0: - qos_entries = self._parse_nicctl_qos(res_qos.stdout) - self._log_event( - category=EventCategory.NETWORK, - description=f"Collected Pensando NIC QoS info: {len(qos_entries)} entries", - priority=EventPriority.INFO, - ) - else: - uncollected_commands.append(self.CMD_NICCTL_QOS) - - # Parse nicctl show rdma statistics output - res_rdma_stats = self._run_sut_cmd(self.CMD_NICCTL_RDMA_STATISTICS, sudo=True) - if res_rdma_stats.exit_code == 0: - rdma_statistics_entries = self._parse_nicctl_rdma_statistics(res_rdma_stats.stdout) - self._log_event( - category=EventCategory.NETWORK, - description=f"Collected Pensando NIC RDMA statistics: {len(rdma_statistics_entries)} entries", - priority=EventPriority.INFO, - ) - else: - uncollected_commands.append(self.CMD_NICCTL_RDMA_STATISTICS) - - # Parse nicctl show version host-software output - res_version_host = self._run_sut_cmd(self.CMD_NICCTL_VERSION_HOST_SOFTWARE, sudo=True) - if res_version_host.exit_code == 0: - version_host_software = self._parse_nicctl_version_host_software( - res_version_host.stdout - ) - if version_host_software: - self._log_event( - category=EventCategory.NETWORK, - description="Collected Pensando NIC host software version", - priority=EventPriority.INFO, - ) - else: - uncollected_commands.append(self.CMD_NICCTL_VERSION_HOST_SOFTWARE) - else: - uncollected_commands.append(self.CMD_NICCTL_VERSION_HOST_SOFTWARE) - - # Parse nicctl show version firmware output - res_version_firmware = self._run_sut_cmd(self.CMD_NICCTL_VERSION_FIRMWARE, sudo=True) - if res_version_firmware.exit_code == 0: - version_firmware_entries = self._parse_nicctl_version_firmware( - res_version_firmware.stdout - ) - self._log_event( - category=EventCategory.NETWORK, - description=f"Collected Pensando NIC firmware versions: {len(version_firmware_entries)} entries", - priority=EventPriority.INFO, - ) - else: - uncollected_commands.append(self.CMD_NICCTL_VERSION_FIRMWARE) - - return ( - cards, - dcqcn_entries, - environment_entries, - pcie_ats_entries, - port_entries, - qos_entries, - rdma_statistics_entries, - version_host_software, - version_firmware_entries, - uncollected_commands, - ) - def _check_network_connectivity(self, cmd: str, url: str) -> bool: """Check network connectivity using specified command. @@ -1784,17 +617,6 @@ def collect_data( rules = [] neighbors = [] ethtool_data = {} - broadcom_devices: List[BroadcomNicDevice] = [] - broadcom_qos_data: Dict[int, BroadcomNicQos] = {} - pensando_cards: List[PensandoNicCard] = [] - pensando_dcqcn: List[PensandoNicDcqcn] = [] - pensando_environment: List[PensandoNicEnvironment] = [] - pensando_pcie_ats: List[PensandoNicPcieAts] = [] - pensando_ports: List[PensandoNicPort] = [] - pensando_qos: List[PensandoNicQos] = [] - pensando_rdma_statistics: List[PensandoNicRdmaStatistics] = [] - pensando_version_host_software: Optional[PensandoNicVersionHostSoftware] = None - pensando_version_firmware: List[PensandoNicVersionFirmware] = [] network_accessible: Optional[bool] = None # Check network connectivity if URL is provided @@ -1894,34 +716,7 @@ def collect_data( # Collect LLDP information self._collect_lldp_info() - # Collect Broadcom NIC information - broadcom_devices, broadcom_qos_data = self._collect_broadcom_nic_info() - - # Collect Pensando NIC information - ( - pensando_cards, - pensando_dcqcn, - pensando_environment, - pensando_pcie_ats, - pensando_ports, - pensando_qos, - pensando_rdma_statistics, - pensando_version_host_software, - pensando_version_firmware, - uncollected_commands, - ) = self._collect_pensando_nic_info() - - # Log summary of uncollected commands or success - if uncollected_commands: - self.result.message = "Network data collection failed" - self._log_event( - category=EventCategory.NETWORK, - description=f"Failed to collect {len(uncollected_commands)} nicctl commands: {', '.join(uncollected_commands)}", - priority=EventPriority.WARNING, - ) - - else: - self.result.message = "Network data collected successfully" + self.result.message = "Network data collected successfully" network_data = NetworkDataModel( interfaces=interfaces, @@ -1929,17 +724,6 @@ def collect_data( rules=rules, neighbors=neighbors, ethtool_info=ethtool_data, - broadcom_nic_devices=broadcom_devices, - broadcom_nic_qos=broadcom_qos_data, - pensando_nic_cards=pensando_cards, - pensando_nic_dcqcn=pensando_dcqcn, - pensando_nic_environment=pensando_environment, - pensando_nic_pcie_ats=pensando_pcie_ats, - pensando_nic_ports=pensando_ports, - pensando_nic_qos=pensando_qos, - pensando_nic_rdma_statistics=pensando_rdma_statistics, - pensando_nic_version_host_software=pensando_version_host_software, - pensando_nic_version_firmware=pensando_version_firmware, accessible=network_accessible, ) self.result.status = ExecutionStatus.OK diff --git a/nodescraper/plugins/inband/network/networkdata.py b/nodescraper/plugins/inband/network/networkdata.py index 8a0bf99f..20caaeca 100644 --- a/nodescraper/plugins/inband/network/networkdata.py +++ b/nodescraper/plugins/inband/network/networkdata.py @@ -107,195 +107,6 @@ class EthtoolInfo(BaseModel): statistics: Dict[str, str] = Field(default_factory=dict) -class BroadcomNicDevice(BaseModel): - """Broadcom NIC device information from niccli --list_devices""" - - device_num: int # Device number (1, 2, 3, etc.) - model: Optional[str] = None # e.g., "Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC" - adapter_port: Optional[str] = None # e.g., "Adp#1 Port#1" - interface_name: Optional[str] = None # e.g., "benic1p1" - mac_address: Optional[str] = None # e.g., "8C:84:74:37:C3:70" - pci_address: Optional[str] = None # e.g., "0000:06:00.0" - - -class BroadcomNicQosAppEntry(BaseModel): - """APP TLV entry in Broadcom NIC QoS configuration""" - - priority: Optional[int] = None - sel: Optional[int] = None - dscp: Optional[int] = None - protocol: Optional[str] = None # "UDP or DCCP", etc. - port: Optional[int] = None - - -class BroadcomNicQos(BaseModel): - """Broadcom NIC QoS information from niccli --dev X qos --ets --show""" - - device_num: int # Device number this QoS info belongs to - raw_output: str # Raw command output - # ETS Configuration - prio_map: Dict[int, int] = Field( - default_factory=dict - ) # Priority to TC mapping {0: 0, 1: 0, ...} - tc_bandwidth: List[int] = Field( - default_factory=list - ) # TC bandwidth percentages [50, 50, 0, ...] - tsa_map: Dict[int, str] = Field( - default_factory=dict - ) # TC to TSA mapping {0: "ets", 1: "ets", ...} - # PFC Configuration - pfc_enabled: Optional[int] = None # Bitmap of PFC enabled priorities - # APP TLV entries - app_entries: List[BroadcomNicQosAppEntry] = Field(default_factory=list) - # TC Rate Limit - tc_rate_limit: List[int] = Field(default_factory=list) # TC rate limits [100, 100, 100, ...] - - -class PensandoNicCard(BaseModel): - """Pensando NIC card information from nicctl show card""" - - id: str # Card ID (UUID format) - pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") - asic: Optional[str] = None # ASIC type (e.g., "salina") - fw_partition: Optional[str] = None # Firmware partition (e.g., "A") - serial_number: Optional[str] = None # Serial number (e.g., "FPL25330294") - - -class PensandoNicDcqcn(BaseModel): - """Pensando NIC DCQCN information from nicctl show dcqcn""" - - nic_id: str # NIC ID (UUID format) - pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") - lif_id: Optional[str] = None # Lif ID (UUID format) - roce_device: Optional[str] = None # ROCE device name (e.g., "rocep9s0") - dcqcn_profile_id: Optional[str] = None # DCQCN profile id (e.g., "1") - status: Optional[str] = None # Status (e.g., "Disabled") - - -class PensandoNicEnvironment(BaseModel): - """Pensando NIC environment information from nicctl show environment""" - - nic_id: str # NIC ID (UUID format) - pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") - # Power measurements in Watts - total_power_drawn: Optional[float] = None # Total power drawn (pin) - core_power: Optional[float] = None # Core power (pout1) - arm_power: Optional[float] = None # ARM power (pout2) - # Temperature measurements in Celsius - local_board_temperature: Optional[float] = None # Local board temperature - die_temperature: Optional[float] = None # Die temperature - # Voltage measurements in millivolts - input_voltage: Optional[float] = None # Input voltage - core_voltage: Optional[float] = None # Core voltage - # Frequency measurements in MHz - core_frequency: Optional[float] = None # Core frequency - cpu_frequency: Optional[float] = None # CPU frequency - p4_stage_frequency: Optional[float] = None # P4 stage frequency - - -class PensandoNicPcieAts(BaseModel): - """Pensando NIC PCIe ATS information from nicctl show pcie ats""" - - nic_id: str # NIC ID (UUID format) - pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") - status: str # Status (e.g., "Disabled", "Enabled") - - -class PensandoNicPort(BaseModel): - """Pensando NIC port information from nicctl show port""" - - nic_id: str # NIC ID (UUID format) - pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") - port_id: str # Port ID (UUID format) - port_name: str # Port name (e.g., "eth1/1") - # Spec fields - spec_ifindex: Optional[str] = None - spec_type: Optional[str] = None - spec_speed: Optional[str] = None - spec_admin_state: Optional[str] = None - spec_fec_type: Optional[str] = None - spec_pause_type: Optional[str] = None - spec_num_lanes: Optional[int] = None - spec_mtu: Optional[int] = None - spec_tx_pause: Optional[str] = None - spec_rx_pause: Optional[str] = None - spec_auto_negotiation: Optional[str] = None - # Status fields - status_physical_port: Optional[int] = None - status_operational_status: Optional[str] = None - status_link_fsm_state: Optional[str] = None - status_fec_type: Optional[str] = None - status_cable_type: Optional[str] = None - status_num_lanes: Optional[int] = None - status_speed: Optional[str] = None - status_auto_negotiation: Optional[str] = None - status_mac_id: Optional[int] = None - status_mac_channel: Optional[int] = None - status_mac_address: Optional[str] = None - status_transceiver_type: Optional[str] = None - status_transceiver_state: Optional[str] = None - status_transceiver_pid: Optional[str] = None - - -class PensandoNicQosScheduling(BaseModel): - """QoS Scheduling entry""" - - priority: int - scheduling_type: Optional[str] = None # e.g., "DWRR" - bandwidth: Optional[int] = None # Bandwidth in percentage - rate_limit: Optional[str] = None # Rate limit (e.g., "N/A" or value in Gbps) - - -class PensandoNicQos(BaseModel): - """Pensando NIC QoS information from nicctl show qos""" - - nic_id: str # NIC ID (UUID format) - pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") - port_id: str # Port ID (UUID format) - classification_type: Optional[str] = None # e.g., "DSCP" - dscp_bitmap: Optional[str] = None # DSCP bitmap - dscp_range: Optional[str] = None # DSCP range (e.g., "0-63") - dscp_priority: Optional[int] = None # Priority mapped from DSCP - pfc_priority_bitmap: Optional[str] = None # PFC priority bitmap - pfc_no_drop_priorities: Optional[str] = None # PFC no-drop priorities - scheduling: List[PensandoNicQosScheduling] = Field(default_factory=list) # Scheduling entries - - -class PensandoNicRdmaStatistic(BaseModel): - """RDMA statistic entry""" - - name: str # Statistic name - count: int # Count value - - -class PensandoNicRdmaStatistics(BaseModel): - """Pensando NIC RDMA statistics from nicctl show rdma statistics""" - - nic_id: str # NIC ID (UUID format) - pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") - statistics: List[PensandoNicRdmaStatistic] = Field(default_factory=list) # Statistics entries - - -class PensandoNicVersionHostSoftware(BaseModel): - """Pensando NIC host software version from nicctl show version host-software""" - - nicctl: Optional[str] = None # nicctl version - ipc_driver: Optional[str] = None # IPC driver version - ionic_driver: Optional[str] = None # ionic driver version - - -class PensandoNicVersionFirmware(BaseModel): - """Pensando NIC firmware version from nicctl show version firmware""" - - nic_id: str # NIC ID (UUID format) - pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") - cpld: Optional[str] = None # CPLD version - boot0: Optional[str] = None # Boot0 version - uboot_a: Optional[str] = None # Uboot-A version - firmware_a: Optional[str] = None # Firmware-A version - device_config_a: Optional[str] = None # Device config-A version - - class NetworkDataModel(DataModel): """Complete network configuration data""" @@ -306,17 +117,4 @@ class NetworkDataModel(DataModel): ethtool_info: Dict[str, EthtoolInfo] = Field( default_factory=dict ) # Interface name -> EthtoolInfo mapping - broadcom_nic_devices: List[BroadcomNicDevice] = Field(default_factory=list) - broadcom_nic_qos: Dict[int, BroadcomNicQos] = Field( - default_factory=dict - ) # Device number -> QoS info mapping - pensando_nic_cards: List[PensandoNicCard] = Field(default_factory=list) - pensando_nic_dcqcn: List[PensandoNicDcqcn] = Field(default_factory=list) - pensando_nic_environment: List[PensandoNicEnvironment] = Field(default_factory=list) - pensando_nic_pcie_ats: List[PensandoNicPcieAts] = Field(default_factory=list) - pensando_nic_ports: List[PensandoNicPort] = Field(default_factory=list) - pensando_nic_qos: List[PensandoNicQos] = Field(default_factory=list) - pensando_nic_rdma_statistics: List[PensandoNicRdmaStatistics] = Field(default_factory=list) - pensando_nic_version_host_software: Optional[PensandoNicVersionHostSoftware] = None - pensando_nic_version_firmware: List[PensandoNicVersionFirmware] = Field(default_factory=list) accessible: Optional[bool] = None # Network accessibility check via ping diff --git a/nodescraper/plugins/inband/nic/__init__.py b/nodescraper/plugins/inband/nic/__init__.py new file mode 100644 index 00000000..187e5b40 --- /dev/null +++ b/nodescraper/plugins/inband/nic/__init__.py @@ -0,0 +1,28 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from .nic_plugin import NicPlugin + +__all__ = ["NicPlugin"] diff --git a/nodescraper/plugins/inband/nic/analyzer_args.py b/nodescraper/plugins/inband/nic/analyzer_args.py new file mode 100644 index 00000000..65214b76 --- /dev/null +++ b/nodescraper/plugins/inband/nic/analyzer_args.py @@ -0,0 +1,75 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from typing import Any, Dict, List, Optional + +from pydantic import Field + +from nodescraper.models import AnalyzerArgs + + +class NicAnalyzerArgs(AnalyzerArgs): + """Analyzer args for niccli/nicctl data, with expected_values keyed by canonical command key.""" + + expected_values: Optional[Dict[str, Dict[str, Any]]] = Field( + default=None, + description="Per-command expected checks keyed by canonical key (see command_to_canonical_key).", + ) + performance_profile_expected: str = Field( + default="RoCE", + description="Expected Broadcom performance_profile value (case-insensitive). Default RoCE.", + ) + support_rdma_disabled_values: List[str] = Field( + default_factory=lambda: ["0", "false", "disabled", "no", "off"], + description="Values that indicate RDMA is not supported (case-insensitive).", + ) + pcie_relaxed_ordering_expected: str = Field( + default="enabled", + description="Expected Broadcom pcie_relaxed_ordering value (e.g. 'Relaxed ordering = enabled'); checked case-insensitively. Default enabled.", + ) + # Expected QoS from niccli getqos (priorities, PFC, ETS) — applied across all adapters when set. + expected_qos_prio_map: Optional[Dict[Any, Any]] = Field( + default=None, + description="Expected priority-to-TC map (e.g. {0: 0, 1: 1}; keys may be int or str in config). Checked per device when set.", + ) + expected_qos_pfc_enabled: Optional[int] = Field( + default=None, + description="Expected PFC enabled value (0/1 or bitmask). Checked per device when set.", + ) + expected_qos_tsa_map: Optional[Dict[Any, Any]] = Field( + default=None, + description="Expected TSA map for ETS (e.g. {0: 'ets', 1: 'strict'}; keys may be int or str in config). Checked per device when set.", + ) + expected_qos_tc_bandwidth: Optional[List[int]] = Field( + default=None, + description="Expected TC bandwidth percentages. Checked per device when set.", + ) + require_qos_consistent_across_adapters: bool = Field( + default=True, + description="When True and no expected_qos_* are set, require all adapters to have the same prio_map, pfc_enabled, and tsa_map.", + ) + nicctl_log_error_regex: Optional[List[Dict[str, Any]]] = Field( + default=None, description="Optional list of error patterns for nicctl show card logs." + ) diff --git a/nodescraper/plugins/inband/nic/collector_args.py b/nodescraper/plugins/inband/nic/collector_args.py new file mode 100644 index 00000000..8085b632 --- /dev/null +++ b/nodescraper/plugins/inband/nic/collector_args.py @@ -0,0 +1,36 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from typing import List, Optional + +from nodescraper.models import CollectorArgs + + +class NicCollectorArgs(CollectorArgs): + """ """ + + commands: Optional[List[str]] = None + use_sudo_niccli: bool = True + use_sudo_nicctl: bool = True diff --git a/nodescraper/plugins/inband/nic/nic_analyzer.py b/nodescraper/plugins/inband/nic/nic_analyzer.py new file mode 100644 index 00000000..30543867 --- /dev/null +++ b/nodescraper/plugins/inband/nic/nic_analyzer.py @@ -0,0 +1,322 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### + +import re +from typing import Any, Dict, List, Optional + +from nodescraper.base.regexanalyzer import ErrorRegex +from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus +from nodescraper.interfaces import DataAnalyzer +from nodescraper.models import TaskResult + +from .analyzer_args import NicAnalyzerArgs +from .nic_data import NicDataModel + +# Default regexes for nicctl show card logs (boot-fault, persistent, non-persistent) +DEFAULT_NICCTL_LOG_ERROR_REGEX: List[ErrorRegex] = [ + ErrorRegex( + regex=re.compile(r"\berror\b", re.IGNORECASE), + message="nicctl card log: error", + event_category=EventCategory.NETWORK, + event_priority=EventPriority.WARNING, + ), + ErrorRegex( + regex=re.compile(r"\bfail(?:ed|ure)?\b", re.IGNORECASE), + message="nicctl card log: fail/failed/failure", + event_category=EventCategory.NETWORK, + event_priority=EventPriority.WARNING, + ), + ErrorRegex( + regex=re.compile(r"\bfault\b", re.IGNORECASE), + message="nicctl card log: fault", + event_category=EventCategory.NETWORK, + event_priority=EventPriority.WARNING, + ), + ErrorRegex( + regex=re.compile(r"\bcritical\b", re.IGNORECASE), + message="nicctl card log: critical", + event_category=EventCategory.NETWORK, + event_priority=EventPriority.WARNING, + ), +] + + +def _nicctl_log_error_regex_list( + args: NicAnalyzerArgs, +) -> List[ErrorRegex]: + """Return list of ErrorRegex for nicctl card logs (from args or default).""" + if not args.nicctl_log_error_regex: + return list(DEFAULT_NICCTL_LOG_ERROR_REGEX) + out: List[ErrorRegex] = [] + for item in args.nicctl_log_error_regex: + if isinstance(item, ErrorRegex): + out.append(item) + elif isinstance(item, dict): + d = dict(item) + d["regex"] = re.compile(d["regex"]) if isinstance(d.get("regex"), str) else d["regex"] + if "event_category" in d and isinstance(d["event_category"], str): + d["event_category"] = EventCategory(d["event_category"]) + if "event_priority" in d: + p = d["event_priority"] + if isinstance(p, str): + d["event_priority"] = getattr(EventPriority, p.upper(), EventPriority.WARNING) + elif isinstance(p, int): + d["event_priority"] = EventPriority(p) + out.append(ErrorRegex(**d)) + return out + + +def _normalize_prio_map(d: Optional[Dict[Any, Any]]) -> Optional[Dict[int, int]]: + """Convert expected_qos_prio_map (config may have str keys) to Dict[int, int].""" + if d is None: + return None + return {int(k): int(v) for k, v in d.items()} + + +def _normalize_tsa_map(d: Optional[Dict[Any, Any]]) -> Optional[Dict[int, str]]: + """Convert expected_qos_tsa_map (config may have str keys) to Dict[int, str].""" + if d is None: + return None + return {int(k): str(v) for k, v in d.items()} + + +class NicAnalyzer(DataAnalyzer[NicDataModel, NicAnalyzerArgs]): + """Analyze niccli/nicctl data; checks Broadcom support_rdma, performance_profile (RoCE), pcie_relaxed_ordering (enabled), and getqos (expected QoS across adapters).""" + + DATA_MODEL = NicDataModel + + def analyze_data( + self, data: NicDataModel, args: Optional[NicAnalyzerArgs] = None + ) -> TaskResult: + """Run checks on the collected data (Broadcom support_rdma, performance_profile, pcie_relaxed_ordering, getqos per device).""" + if args is None: + args = NicAnalyzerArgs() + + has_broadcom = bool(data.broadcom_nic_support_rdma) + has_nicctl_logs = bool( + data.nicctl_card_logs and any((c or "").strip() for c in data.nicctl_card_logs.values()) + ) + if not has_broadcom and not has_nicctl_logs: + self.result.message = "No Broadcom support_rdma or nicctl card log data to check" + self.result.status = ExecutionStatus.OK + return self.result + + disabled_values = set(args.support_rdma_disabled_values) + any_disabled = False + for device_num, value in sorted(data.broadcom_nic_support_rdma.items()): + value_lower = (value or "").strip().lower() + if value_lower in disabled_values: + any_disabled = True + self._log_event( + category=EventCategory.NETWORK, + description=f"Broadcom device {device_num}: support_rdma is disabled or off", + data={"device_num": device_num, "support_rdma_output": value}, + priority=EventPriority.WARNING, + console_log=True, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description=f"Broadcom device {device_num}: support_rdma = {value!r}", + data={"device_num": device_num, "support_rdma_output": value}, + priority=EventPriority.INFO, + ) + + if any_disabled: + self.result.message = "One or more Broadcom devices have support_rdma disabled" + self.result.status = ExecutionStatus.WARNING + + # performance_profile expected value check (default RoCE) + expected_profile = args.performance_profile_expected.strip() + expected_profile_lower = expected_profile.lower() + any_non_roce = False + if data.broadcom_nic_performance_profile: + for device_num, value in sorted(data.broadcom_nic_performance_profile.items()): + value_normalized = (value or "").strip().lower() + if expected_profile_lower not in value_normalized: + any_non_roce = True + self._log_event( + category=EventCategory.NETWORK, + description=f"Broadcom device {device_num}: performance_profile is {value!r} (expected {expected_profile})", + data={"device_num": device_num, "performance_profile_output": value}, + priority=EventPriority.WARNING, + console_log=True, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description=f"Broadcom device {device_num}: performance_profile = {expected_profile}", + data={"device_num": device_num, "performance_profile_output": value}, + priority=EventPriority.INFO, + ) + + # pcie_relaxed_ordering check (default: output should indicate "enabled") + expected_ro = args.pcie_relaxed_ordering_expected.strip().lower() + any_relaxed_ordering_bad = False + if data.broadcom_nic_pcie_relaxed_ordering and expected_ro: + for device_num, value in sorted(data.broadcom_nic_pcie_relaxed_ordering.items()): + value_lower = (value or "").strip().lower() + if expected_ro not in value_lower: + any_relaxed_ordering_bad = True + self._log_event( + category=EventCategory.NETWORK, + description=f"Broadcom device {device_num}: pcie_relaxed_ordering does not show {args.pcie_relaxed_ordering_expected!r} (got {value!r})", + data={"device_num": device_num, "pcie_relaxed_ordering_output": value}, + priority=EventPriority.WARNING, + console_log=True, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description=f"Broadcom device {device_num}: pcie_relaxed_ordering = {args.pcie_relaxed_ordering_expected}", + data={"device_num": device_num, "pcie_relaxed_ordering_output": value}, + priority=EventPriority.INFO, + ) + + # getqos: expected QoS (priorities, PFC, ETS) across all adapters + any_qos_mismatch = False + expected_prio = _normalize_prio_map(args.expected_qos_prio_map) + expected_tsa = _normalize_tsa_map(args.expected_qos_tsa_map) + if ( + expected_prio is not None + or args.expected_qos_pfc_enabled is not None + or expected_tsa is not None + or args.expected_qos_tc_bandwidth is not None + ): + for device_num, qos in sorted(data.broadcom_nic_qos.items()): + mismatches = [] + if expected_prio is not None and qos.prio_map != expected_prio: + mismatches.append(f"prio_map {qos.prio_map!r} != expected {expected_prio!r}") + if ( + args.expected_qos_pfc_enabled is not None + and qos.pfc_enabled != args.expected_qos_pfc_enabled + ): + mismatches.append( + f"pfc_enabled {qos.pfc_enabled!r} != expected {args.expected_qos_pfc_enabled!r}" + ) + if expected_tsa is not None and qos.tsa_map != expected_tsa: + mismatches.append(f"tsa_map {qos.tsa_map!r} != expected {expected_tsa!r}") + if ( + args.expected_qos_tc_bandwidth is not None + and qos.tc_bandwidth != args.expected_qos_tc_bandwidth + ): + mismatches.append( + f"tc_bandwidth {qos.tc_bandwidth!r} != expected {args.expected_qos_tc_bandwidth!r}" + ) + if mismatches: + any_qos_mismatch = True + self._log_event( + category=EventCategory.NETWORK, + description=f"Broadcom device {device_num}: getqos does not match expected QoS: {'; '.join(mismatches)}", + data={ + "device_num": device_num, + "qos": qos.model_dump(), + "mismatches": mismatches, + }, + priority=EventPriority.WARNING, + console_log=True, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description=f"Broadcom device {device_num}: getqos matches expected (priorities, PFC, ETS)", + data={"device_num": device_num}, + priority=EventPriority.INFO, + ) + elif args.require_qos_consistent_across_adapters and len(data.broadcom_nic_qos) >= 2: + qos_list = list(data.broadcom_nic_qos.values()) + first = qos_list[0] + for device_num, qos in sorted(data.broadcom_nic_qos.items()): + if ( + qos.prio_map != first.prio_map + or qos.pfc_enabled != first.pfc_enabled + or qos.tsa_map != first.tsa_map + ): + any_qos_mismatch = True + self._log_event( + category=EventCategory.NETWORK, + description=f"Broadcom device {device_num}: getqos differs from other adapters (priorities, PFC, or ETS not consistent)", + data={"device_num": device_num, "qos": qos.model_dump()}, + priority=EventPriority.WARNING, + console_log=True, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description=f"Broadcom device {device_num}: getqos consistent with other adapters", + data={"device_num": device_num}, + priority=EventPriority.INFO, + ) + + # nicctl card logs (boot-fault, persistent, non-persistent): run error regexes and log matches to user. + any_nicctl_log_errors = False + if data.nicctl_card_logs: + regex_list = _nicctl_log_error_regex_list(args) + for log_type, content in data.nicctl_card_logs.items(): + if not (content or "").strip(): + continue + for err_regex in regex_list: + for match in err_regex.regex.finditer(content): + matched_text = match.group(0).strip() or match.group(0) + if len(matched_text) > 500: + matched_text = matched_text[:497] + "..." + any_nicctl_log_errors = True + self._log_event( + category=err_regex.event_category, + description=f"nicctl card log ({log_type}): {err_regex.message} — {matched_text!r}", + data={ + "log_type": log_type, + "message": err_regex.message, + "match_content": matched_text, + }, + priority=err_regex.event_priority, + console_log=True, + ) + + if ( + any_disabled + or any_non_roce + or any_relaxed_ordering_bad + or any_qos_mismatch + or any_nicctl_log_errors + ): + self.result.status = ExecutionStatus.WARNING + parts = [] + if any_disabled: + parts.append("support_rdma") + if any_non_roce: + parts.append("performance_profile") + if any_relaxed_ordering_bad: + parts.append("pcie_relaxed_ordering") + if any_qos_mismatch: + parts.append("getqos") + if any_nicctl_log_errors: + parts.append("nicctl_card_logs") + self.result.message = f"Broadcom/nic check(s) failed: {' and/or '.join(parts)}" + else: + self.result.status = ExecutionStatus.OK + self.result.message = "Broadcom support_rdma, performance_profile, pcie_relaxed_ordering, getqos, and nicctl card logs checks OK" + return self.result diff --git a/nodescraper/plugins/inband/nic/nic_collector.py b/nodescraper/plugins/inband/nic/nic_collector.py new file mode 100644 index 00000000..fd38939a --- /dev/null +++ b/nodescraper/plugins/inband/nic/nic_collector.py @@ -0,0 +1,1312 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +import json +import re +from typing import Any, Dict, List, Optional, Tuple + +from nodescraper.base import InBandDataCollector +from nodescraper.connection.inband import TextFileArtifact +from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus +from nodescraper.models import TaskResult +from nodescraper.utils import ( + command_result_event_data, + has_command_error_output, +) + +from .collector_args import NicCollectorArgs +from .nic_data import ( + NicCliDevice, + NicCliQos, + NicCliQosAppEntry, + NicCommandResult, + NicCtlCard, + NicCtlCardShow, + NicCtlDcqcn, + NicCtlEnvironment, + NicCtlLif, + NicCtlPort, + NicCtlQos, + NicCtlRdma, + NicCtlVersion, + NicDataModel, + PensandoNicCard, + PensandoNicDcqcn, + PensandoNicEnvironment, + PensandoNicLif, + PensandoNicPcieAts, + PensandoNicPort, + PensandoNicQos, + PensandoNicQosScheduling, + PensandoNicRdmaStatistic, + PensandoNicRdmaStatistics, + PensandoNicVersionFirmware, + PensandoNicVersionHostSoftware, + command_to_canonical_key, +) + +# Default commands: niccli (Broadcom) and nicctl (Pensando). Use {device_num} and {card_id} placeholders. +NICCLI_VERSION_CMD = "niccli --version" +NICCLI_VERSION_LEGACY_MAX = 233 # Commands below use -dev/-getoption/getqos; for version > this use --dev/--getoption/qos --ets --show +NICCLI_LIST_CMD = "niccli --list" +NICCLI_LIST_DEVICES_CMD = "niccli --list_devices" # new (> v233) +NICCLI_LIST_DEVICES_CMD_LEGACY = "niccli --listdev" # legacy (<= v233) +NICCLI_DISCOVERY_CMDS_LEGACY = [ + NICCLI_LIST_DEVICES_CMD_LEGACY, + NICCLI_LIST_CMD, +] +NICCLI_DISCOVERY_CMDS_NEW = [ + NICCLI_LIST_DEVICES_CMD, + NICCLI_LIST_CMD, +] +# All discovery command variants (for canonical key); default list for backward compat = legacy +NICCLI_DISCOVERY_CMDS = NICCLI_DISCOVERY_CMDS_LEGACY +NICCLI_DISCOVERY_CMDS_ALL = frozenset( + [NICCLI_LIST_DEVICES_CMD_LEGACY, NICCLI_LIST_DEVICES_CMD, NICCLI_LIST_CMD] +) +# Legacy (<= v233): single-dash options and getqos +NICCLI_SUPPORT_RDMA_CMD_TEMPLATE_LEGACY = ( + "niccli -dev {device_num} nvm -getoption support_rdma -scope 0" +) +NICCLI_PERFORMANCE_PROFILE_CMD_TEMPLATE_LEGACY = ( + "niccli -dev {device_num} nvm -getoption performance_profile" +) +NICCLI_PCIE_RELAXED_ORDERING_CMD_TEMPLATE_LEGACY = ( + "niccli -dev {device_num} nvm -getoption pcie_relaxed_ordering" +) +NICCLI_QOS_CMD_TEMPLATE_LEGACY = "niccli -dev {device_num} getqos" +NICCLI_PER_DEVICE_TEMPLATES_LEGACY = [ + NICCLI_SUPPORT_RDMA_CMD_TEMPLATE_LEGACY, + NICCLI_PERFORMANCE_PROFILE_CMD_TEMPLATE_LEGACY, + NICCLI_PCIE_RELAXED_ORDERING_CMD_TEMPLATE_LEGACY, + NICCLI_QOS_CMD_TEMPLATE_LEGACY, +] +# New (> v233): double-dash options and qos --ets --show +NICCLI_SUPPORT_RDMA_CMD_TEMPLATE_NEW = "niccli --dev {device_num} nvm --getoption support_rdma" +NICCLI_PERFORMANCE_PROFILE_CMD_TEMPLATE_NEW = ( + "niccli --dev {device_num} nvm --getoption performance_profile" +) +NICCLI_PCIE_RELAXED_ORDERING_CMD_TEMPLATE_NEW = ( + "niccli --dev {device_num} nvm --getoption pcie_relaxed_ordering" +) +NICCLI_QOS_CMD_TEMPLATE_NEW = "niccli --dev {device_num} qos --ets --show" +NICCLI_PER_DEVICE_TEMPLATES_NEW = [ + NICCLI_SUPPORT_RDMA_CMD_TEMPLATE_NEW, + NICCLI_PERFORMANCE_PROFILE_CMD_TEMPLATE_NEW, + NICCLI_PCIE_RELAXED_ORDERING_CMD_TEMPLATE_NEW, + NICCLI_QOS_CMD_TEMPLATE_NEW, +] +# Backward compatibility: default to legacy templates (used by _default_commands and any code that imports these) +NICCLI_SUPPORT_RDMA_CMD_TEMPLATE = NICCLI_SUPPORT_RDMA_CMD_TEMPLATE_LEGACY +NICCLI_PERFORMANCE_PROFILE_CMD_TEMPLATE = NICCLI_PERFORMANCE_PROFILE_CMD_TEMPLATE_LEGACY +NICCLI_PCIE_RELAXED_ORDERING_CMD_TEMPLATE = NICCLI_PCIE_RELAXED_ORDERING_CMD_TEMPLATE_LEGACY +NICCLI_PER_DEVICE_TEMPLATES = NICCLI_PER_DEVICE_TEMPLATES_LEGACY +# Text-format command for card discovery and pensando_nic_cards (no --json). +NICCTL_CARD_TEXT_CMD = "nicctl show card" +NICCTL_GLOBAL_COMMANDS = [ + "nicctl --version", + "nicctl show card flash partition --json", + "nicctl show card interrupts --json", + "nicctl show card logs --non-persistent", + "nicctl show card logs --boot-fault", + "nicctl show card logs --persistent", + "nicctl show card profile --json", + "nicctl show card time --json", + "nicctl show card statistics packet-buffer summary --json", + "nicctl show lif statistics --json", + "nicctl show lif internal queue-to-ud-pinning", + "nicctl show pipeline internal anomalies", + "nicctl show pipeline internal rsq-ring", + "nicctl show pipeline internal statistics memory", + "nicctl show port fsm", + "nicctl show port transceiver --json", + "nicctl show port statistics --json", + "nicctl show port internal mac", + "nicctl show qos headroom --json", + "nicctl show rdma queue --json", + "nicctl show rdma queue-pair --detail --json", + "nicctl show version firmware", +] +NICCTL_PER_CARD_TEMPLATES = [ + "nicctl show dcqcn --card {card_id} --json", + "nicctl show card hardware-config --card {card_id}", +] + +# Legacy text-format commands for Pensando (no --json); parsed by _parse_nicctl_* into pensando_nic_*. +NICCTL_LEGACY_TEXT_COMMANDS = [ + "nicctl show card", + "nicctl show dcqcn", + "nicctl show environment", + "nicctl show lif", + "nicctl show pcie ats", + "nicctl show port", + "nicctl show qos", + "nicctl show rdma statistics", + "nicctl show version host-software", +] + +# Max lengths for fields included in the serialized datamodel (keeps nicclidatamodel.json small). +MAX_COMMAND_LENGTH_IN_DATAMODEL = 256 +MAX_STDERR_LENGTH_IN_DATAMODEL = 512 + + +def _parse_niccli_version(stdout: str) -> Optional[int]: + """Parse niccli version number from 'niccli --version' output. + Handles formats like 'niccli v233', 'v233', 'version 233', '233'. + Returns None if version cannot be parsed. + """ + if not stdout or not stdout.strip(): + return None + # Match v233, v 233, version 233, niccli 233, etc. + match = re.search(r"v?\s*(\d+)|version\s+(\d+)|\b(\d{2,})\b", stdout.strip(), re.I) + if match: + for g in match.groups(): + if g is not None: + return int(g) + return None + + +def _get_niccli_per_device_templates(version: Optional[int]) -> List[str]: + """Return the per-device command templates for the given niccli version. + For version > NICCLI_VERSION_LEGACY_MAX (233) use new syntax (--dev, --getoption, qos --ets --show). + Otherwise use legacy syntax (-dev, -getoption, getqos). If version is None, default to legacy. + """ + if version is not None and version > NICCLI_VERSION_LEGACY_MAX: + return NICCLI_PER_DEVICE_TEMPLATES_NEW.copy() + return NICCLI_PER_DEVICE_TEMPLATES_LEGACY.copy() + + +def _get_niccli_discovery_commands(version: Optional[int]) -> List[str]: + """Return the discovery commands for the given niccli version. + Legacy (<= v233) uses --listdev; new (> v233) uses --list_devices. If version is None, default to legacy. + """ + if version is not None and version > NICCLI_VERSION_LEGACY_MAX: + return NICCLI_DISCOVERY_CMDS_NEW.copy() + return NICCLI_DISCOVERY_CMDS_LEGACY.copy() + + +# Commands whose output is very long; store only as file artifacts, not in data model. +def _is_artifact_only_command(cmd: str) -> bool: + c = cmd.strip() + if c.startswith("nicctl show card logs "): + return True + if "nicctl show card hardware-config --card " in c: + return True + if c == "nicctl show port fsm": + return True + if c.startswith("nicctl show pipeline internal "): + return True + if c == "nicctl show rdma queue-pair --detail --json": + return True + if c == "nicctl show lif internal queue-to-ud-pinning": + return True + if c == "nicctl show port internal mac": + return True + return False + + +def _merged_canonical_key(cmd: str) -> str: + """Return a single canonical key for commands that collect the same data.""" + if cmd in NICCLI_DISCOVERY_CMDS_ALL: + return "niccli_discovery" + return command_to_canonical_key(cmd) + + +def _default_commands() -> List[str]: + """Return the default flat list of command templates (with placeholders).""" + out: List[str] = [NICCLI_LIST_CMD] + for t in NICCLI_PER_DEVICE_TEMPLATES: + out.append(t) + for c in NICCTL_GLOBAL_COMMANDS: + out.append(c) + for t in NICCTL_PER_CARD_TEMPLATES: + out.append(t) + return out + + +def _parse_niccli_qos_app_entries(stdout: str) -> List[NicCliQosAppEntry]: + """Parse APP# blocks from niccli qos output into NicCliQosAppEntry list.""" + entries: List[NicCliQosAppEntry] = [] + current: Optional[NicCliQosAppEntry] = None + for line in stdout.splitlines(): + line = line.strip() + if re.match(r"APP#\d+", line, re.I): + if current is not None: + entries.append(current) + current = NicCliQosAppEntry() + continue + if current is None or ":" not in line: + continue + key, _, val = line.partition(":") + key, val = key.strip().lower(), val.strip() + if "priority" in key: + try: + current.priority = int(val) + except ValueError: + pass + elif key == "sel": + try: + current.sel = int(val) + except ValueError: + pass + elif key == "dscp": + try: + current.dscp = int(val) + except ValueError: + pass + elif key == "port": + try: + current.port = int(val) + except ValueError: + pass + elif ( + key in ("tcp", "udp", "dccp") + or "protocol" in key + or "udp" in key + or "tcp" in key + or "dccp" in key + ): + if val and not val.isdigit(): + current.protocol = val + else: + current.protocol = { + "udp or dccp": "UDP or DCCP", + "tcp": "TCP", + "udp": "UDP", + "dccp": "DCCP", + }.get(key, key.replace("_", " ").title() if val.isdigit() else val) + if val: + try: + current.port = int(val) + except ValueError: + pass + if current is not None: + entries.append(current) + return entries + + +def _parse_niccli_device_numbers(stdout: str) -> List[int]: + """Parse device numbers from niccli --list or --list_devices output. + Looks for lines like '1) Model' or '1 )' to extract device index. + """ + device_nums: List[int] = [] + for line in stdout.splitlines(): + line = line.strip() + if not line: + continue + match = re.match(r"^(\d+)\s*\)", line) + if match: + try: + device_nums.append(int(match.group(1))) + except ValueError: + continue + return sorted(set(device_nums)) + + +def _parse_nicctl_card_ids(stdout: str) -> List[str]: + """Parse card IDs from nicctl show card --json output. + Expects JSON: either a list of objects with 'id'/'card_id' or an object with a list. + """ + try: + data = json.loads(stdout) + except json.JSONDecodeError: + return [] + ids: List[str] = [] + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + cid = item.get("id") or item.get("card_id") or item.get("CardId") + if cid is not None: + ids.append(str(cid)) + elif isinstance(data, dict): + cards = data.get("cards") or data.get("Cards") or data.get("card") or data.get("data") + if isinstance(cards, list): + for item in cards: + if isinstance(item, dict): + cid = item.get("id") or item.get("card_id") or item.get("CardId") + if cid is not None: + ids.append(str(cid)) + cid = data.get("id") or data.get("card_id") + if cid is not None and str(cid) not in ids: + ids.append(str(cid)) + return ids + + +def _card_list_items(data: Any) -> List[Any]: + """Return list of card item dicts from parsed nicctl show card --json.""" + if data is None: + return [] + if isinstance(data, list): + return [x for x in data if isinstance(x, dict)] + if isinstance(data, dict): + cards = data.get("cards") or data.get("Cards") or data.get("card") or data.get("data") + if isinstance(cards, list): + return [x for x in cards if isinstance(x, dict)] + return [] + + +def _find_card_info(card_list: List[Any], card_id: str) -> Optional[Any]: + """Return the card item dict whose id/card_id matches card_id.""" + for item in card_list: + cid = item.get("id") or item.get("card_id") or item.get("CardId") + if cid is not None and str(cid) == str(card_id): + return item + return None + + +def _build_structured( + results: Dict[str, NicCommandResult], + parsed: Dict[str, Any], + card_ids: List[str], + card_list_override: Optional[List[Dict[str, Any]]] = None, +) -> Tuple[ + Optional[NicCtlCardShow], + List[NicCtlCard], + Optional[NicCtlPort], + Optional[NicCtlLif], + Optional[NicCtlQos], + Optional[NicCtlRdma], + Optional[NicCtlDcqcn], + Optional[NicCtlEnvironment], + Optional[NicCtlVersion], +]: + """Build structured domain objects from results and parsed dicts.""" + + def _p(cmd: str) -> Any: + return parsed.get(cmd) + + def _r(cmd: str) -> Optional[NicCommandResult]: + return results.get(cmd) + + def _stdout(cmd: str) -> str: + r = _r(cmd) + return (r.stdout or "") if r else "" + + card_list = ( + card_list_override + if card_list_override is not None + else _card_list_items(_p("nicctl show card --json")) + ) + cards: List[NicCtlCard] = [] + for cid in card_ids: + info = _find_card_info(card_list, cid) + hw_cmd = f"nicctl show card hardware-config --card {cid}" + dcqcn_cmd = f"nicctl show dcqcn --card {cid} --json" + cards.append( + NicCtlCard( + card_id=cid, + info=info, + hardware_config=_stdout(hw_cmd) or None, + dcqcn=_p(dcqcn_cmd), + ) + ) + + card_show = NicCtlCardShow( + flash_partition=_p("nicctl show card flash partition --json"), + interrupts=_p("nicctl show card interrupts --json"), + logs_non_persistent=_stdout("nicctl show card logs --non-persistent") or None, + logs_boot_fault=_stdout("nicctl show card logs --boot-fault") or None, + logs_persistent=_stdout("nicctl show card logs --persistent") or None, + profile=_p("nicctl show card profile --json"), + time=_p("nicctl show card time --json"), + statistics_packet_buffer_summary=_p( + "nicctl show card statistics packet-buffer summary --json" + ), + ) + + port = NicCtlPort( + port=_p("nicctl show port"), + port_fsm=_stdout("nicctl show port fsm") or None, + port_transceiver=_p("nicctl show port transceiver --json"), + port_statistics=_p("nicctl show port statistics --json"), + port_internal_mac=_stdout("nicctl show port internal mac") or None, + ) + lif = NicCtlLif( + lif=_p("nicctl show lif"), + lif_statistics=_p("nicctl show lif statistics --json"), + lif_internal_queue_to_ud_pinning=_stdout("nicctl show lif internal queue-to-ud-pinning") + or None, + ) + qos = NicCtlQos( + qos=_p("nicctl show qos"), + qos_headroom=_p("nicctl show qos headroom --json"), + ) + rdma = NicCtlRdma( + rdma_queue=_p("nicctl show rdma queue --json"), + rdma_queue_pair_detail=_p("nicctl show rdma queue-pair --detail --json"), + rdma_statistics=_p("nicctl show rdma statistics"), + ) + dcqcn = NicCtlDcqcn(dcqcn_global=_p("nicctl show dcqcn")) + environment = NicCtlEnvironment(environment=_p("nicctl show environment")) + version = NicCtlVersion( + version=_stdout("nicctl --version") or None, + version_firmware=_stdout("nicctl show version firmware") or None, + ) + return card_show, cards, port, lif, qos, rdma, dcqcn, environment, version + + +class NicCollector(InBandDataCollector[NicDataModel, NicCollectorArgs]): + """Collect raw output from niccli (Broadcom) and nicctl (Pensando) commands.""" + + DATA_MODEL = NicDataModel + + def collect_data( + self, + args: Optional[NicCollectorArgs] = None, + ) -> Tuple[TaskResult, Optional[NicDataModel]]: + """Run niccli/nicctl commands and store stdout/stderr/exit_code per command.""" + use_sudo_niccli = args.use_sudo_niccli if args else True + use_sudo_nicctl = args.use_sudo_nicctl if args else True + custom_commands = args.commands if args and args.commands else None + + results: dict[str, NicCommandResult] = {} + + # Detect niccli version to choose command set (legacy <= v233 vs new > v233) + niccli_version: Optional[int] = None + res_version = self._run_sut_cmd(NICCLI_VERSION_CMD, sudo=use_sudo_niccli) + if res_version.exit_code == 0 and res_version.stdout: + niccli_version = _parse_niccli_version(res_version.stdout) + results[NICCLI_VERSION_CMD] = NicCommandResult( + command=NICCLI_VERSION_CMD, + stdout=res_version.stdout or "", + stderr=res_version.stderr or "", + exit_code=res_version.exit_code, + ) + + # Discovery: device numbers from niccli + device_nums: List[int] = [] + discovery_cmds = _get_niccli_discovery_commands(niccli_version) + for list_cmd in discovery_cmds: + res = self._run_sut_cmd(list_cmd, sudo=use_sudo_niccli) + results[list_cmd] = NicCommandResult( + command=list_cmd, + stdout=res.stdout or "", + stderr=res.stderr or "", + exit_code=res.exit_code, + ) + if res.exit_code == 0 and res.stdout: + device_nums = _parse_niccli_device_numbers(res.stdout) + if device_nums: + break + + # Discovery: card IDs from nicctl show card (text); same output used for pensando_nic_cards + card_ids: List[str] = [] + card_list_from_text: List[Dict[str, Any]] = [] + res_card = self._run_sut_cmd(NICCTL_CARD_TEXT_CMD, sudo=use_sudo_nicctl) + results[NICCTL_CARD_TEXT_CMD] = NicCommandResult( + command=NICCTL_CARD_TEXT_CMD, + stdout=res_card.stdout or "", + stderr=res_card.stderr or "", + exit_code=res_card.exit_code, + ) + if res_card.exit_code == 0 and res_card.stdout: + legacy_cards = self._parse_nicctl_card(res_card.stdout) + card_ids = [c.id for c in legacy_cards] + card_list_from_text = [c.model_dump() for c in legacy_cards] + + # Build full command list (expand placeholders) + if custom_commands is not None: + commands_to_run: List[str] = [] + for tpl in custom_commands: + if "{device_num}" in tpl: + for d in device_nums: + commands_to_run.append(tpl.format(device_num=d)) + elif "{card_id}" in tpl: + for c in card_ids: + commands_to_run.append(tpl.format(card_id=c)) + else: + commands_to_run.append(tpl) + else: + commands_to_run = [] + # niccli list already stored + per_device_templates = _get_niccli_per_device_templates(niccli_version) + for tpl in per_device_templates: + for d in device_nums: + commands_to_run.append(tpl.format(device_num=d)) + # nicctl global (card discovery already done via NICCTL_CARD_TEXT_CMD) + for c in NICCTL_GLOBAL_COMMANDS: + commands_to_run.append(c) + for tpl in NICCTL_PER_CARD_TEMPLATES: + for cid in card_ids: + commands_to_run.append(tpl.format(card_id=cid)) + for cmd in NICCTL_LEGACY_TEXT_COMMANDS: + commands_to_run.append(cmd) + + # Run each command and store (artifact-only commands are not added to results / data model). + for cmd in commands_to_run: + if cmd in results: + continue + is_niccli = cmd.strip().startswith("niccli") + sudo = use_sudo_niccli if is_niccli else use_sudo_nicctl + res = self._run_sut_cmd(cmd, sudo=sudo) + has_error_output = has_command_error_output(res.stderr or "", res.stdout or "") + if _is_artifact_only_command(cmd): + if res.exit_code != 0: + self._log_event( + category=EventCategory.NETWORK, + description=f"niccli/nicctl command failed: {cmd}", + data=command_result_event_data(res), + priority=EventPriority.WARNING, + ) + elif has_error_output: + self._log_event( + category=EventCategory.NETWORK, + description=f"niccli/nicctl reported errors (exit 0): {cmd}", + data=command_result_event_data(res), + priority=EventPriority.WARNING, + ) + continue + results[cmd] = NicCommandResult( + command=cmd, + stdout=res.stdout or "", + stderr=res.stderr or "", + exit_code=res.exit_code, + ) + if res.exit_code != 0: + self._log_event( + category=EventCategory.NETWORK, + description=f"niccli/nicctl command failed: {cmd}", + data=command_result_event_data(res), + priority=EventPriority.WARNING, + ) + elif has_error_output: + self._log_event( + category=EventCategory.NETWORK, + description=f"niccli/nicctl reported errors (exit 0): {cmd}", + data=command_result_event_data(res), + priority=EventPriority.WARNING, + ) + + # Parse JSON for building structured domain objects (artifact-only commands have no stdout, so not in parsed). + parsed: Dict[str, Any] = {} + for cmd, r in results.items(): + if r.exit_code != 0 or not (r.stdout or "").strip(): + continue + try: + parsed[cmd] = json.loads(r.stdout.strip()) + except (ValueError, TypeError): + pass + + # Build structured domain objects from JSON/raw output (card_show/cards from text when present). + ( + card_show, + cards, + port, + lif, + qos, + rdma, + dcqcn, + environment, + version, + ) = _build_structured( + results, parsed, card_ids, card_list_override=card_list_from_text or None + ) + + # card_show and cards (can be large) go to TextFileArtifacts; excluded from datamodel. + if card_show is not None: + self.result.artifacts.append( + TextFileArtifact( + filename="niccli_card_show.json", + contents=card_show.model_dump_json(indent=2), + ) + ) + if cards: + self.result.artifacts.append( + TextFileArtifact( + filename="niccli_cards.json", + contents=json.dumps([c.model_dump(mode="json") for c in cards], indent=2), + ) + ) + + # Serialized nicclidatamodel.json: no stdout in results, truncated command/stderr (keeps file small). + # Command output lives on disk from _run_sut_cmd; model keeps only command identity and status. + def _truncate(s: str, max_len: int) -> str: + if not s or len(s) <= max_len: + return s or "" + return s[: max_len - 3] + "..." + + results_for_model = { + cmd: NicCommandResult( + command=_truncate(r.command, MAX_COMMAND_LENGTH_IN_DATAMODEL), + stdout="", + stderr=_truncate(r.stderr or "", MAX_STDERR_LENGTH_IN_DATAMODEL), + exit_code=r.exit_code, + ) + for cmd, r in results.items() + } + + # Legacy text parsers: populate broadcom_nic_* and pensando_nic_* for the datamodel. + ( + broadcom_devices, + broadcom_qos_data, + broadcom_support_rdma, + broadcom_performance_profile, + broadcom_pcie_relaxed_ordering, + ) = self._collect_broadcom_nic_structured(results, niccli_version=niccli_version) + ( + pensando_cards, + pensando_dcqcn, + pensando_environment, + pensando_lif, + pensando_pcie_ats, + pensando_ports, + pensando_qos, + pensando_rdma_statistics, + pensando_version_host_software, + pensando_version_firmware, + ) = self._collect_pensando_nic_structured(results) + + if not results or all(r.exit_code != 0 for r in results.values()): + self.result.status = ExecutionStatus.EXECUTION_FAILURE + self.result.message = "All niccli/nicctl commands failed or no commands were run" + else: + self.result.status = ExecutionStatus.OK + self.result.message = f"Collected {len(results)} niccli/nicctl command results" + + nicctl_card_logs = None + if card_show is not None: + nicctl_card_logs = { + "boot_fault": (card_show.logs_boot_fault or ""), + "persistent": (card_show.logs_persistent or ""), + "non_persistent": (card_show.logs_non_persistent or ""), + } + + return self.result, NicDataModel( + results=results_for_model, + card_show=None, + cards=[], + nicctl_card_logs=nicctl_card_logs, + port=port, + lif=lif, + qos=qos, + rdma=rdma, + dcqcn=dcqcn, + environment=environment, + version=version, + broadcom_nic_devices=broadcom_devices, + broadcom_nic_qos=broadcom_qos_data, + broadcom_nic_support_rdma=broadcom_support_rdma, + broadcom_nic_performance_profile=broadcom_performance_profile, + broadcom_nic_pcie_relaxed_ordering=broadcom_pcie_relaxed_ordering, + pensando_nic_cards=pensando_cards, + pensando_nic_dcqcn=pensando_dcqcn, + pensando_nic_environment=pensando_environment, + pensando_nic_lif=pensando_lif, + pensando_nic_pcie_ats=pensando_pcie_ats, + pensando_nic_ports=pensando_ports, + pensando_nic_qos=pensando_qos, + pensando_nic_rdma_statistics=pensando_rdma_statistics, + pensando_nic_version_host_software=pensando_version_host_software, + pensando_nic_version_firmware=pensando_version_firmware, + ) + + def _collect_broadcom_nic_structured( + self, + results: Dict[str, NicCommandResult], + niccli_version: Optional[int] = None, + ) -> Tuple[ + List[NicCliDevice], Dict[int, NicCliQos], Dict[int, str], Dict[int, str], Dict[int, str] + ]: + """Build niccli (Broadcom) structured data from results using legacy text parsers.""" + devices: List[NicCliDevice] = [] + qos_data: Dict[int, NicCliQos] = {} + support_rdma: Dict[int, str] = {} + performance_profile: Dict[int, str] = {} + pcie_relaxed_ordering: Dict[int, str] = {} + list_stdout: Optional[str] = None + discovery_cmds = _get_niccli_discovery_commands(niccli_version) + for list_cmd in discovery_cmds: + r = results.get(list_cmd) + if r and r.exit_code == 0 and (r.stdout or "").strip(): + list_stdout = r.stdout + break + if not list_stdout: + return devices, qos_data, support_rdma, performance_profile, pcie_relaxed_ordering + devices = self._parse_niccli_listdev(list_stdout) + templates = _get_niccli_per_device_templates(niccli_version) + support_rdma_tpl, perf_tpl, pcie_ro_tpl, qos_tpl = ( + templates[0], + templates[1], + templates[2], + templates[3], + ) + for device in devices: + qos_cmd = qos_tpl.format(device_num=device.device_num) + r = results.get(qos_cmd) + if r and r.exit_code == 0 and (r.stdout or "").strip(): + qos_data[device.device_num] = self._parse_niccli_qos( + device.device_num, r.stdout or "" + ) + support_rdma_cmd = support_rdma_tpl.format(device_num=device.device_num) + r_sr = results.get(support_rdma_cmd) + if r_sr and r_sr.exit_code == 0 and (r_sr.stdout or "").strip(): + support_rdma[device.device_num] = (r_sr.stdout or "").strip() + perf_cmd = perf_tpl.format(device_num=device.device_num) + r_pp = results.get(perf_cmd) + if r_pp and r_pp.exit_code == 0 and (r_pp.stdout or "").strip(): + performance_profile[device.device_num] = (r_pp.stdout or "").strip() + ro_cmd = pcie_ro_tpl.format(device_num=device.device_num) + r_ro = results.get(ro_cmd) + if r_ro and r_ro.exit_code == 0 and (r_ro.stdout or "").strip(): + pcie_relaxed_ordering[device.device_num] = (r_ro.stdout or "").strip() + return devices, qos_data, support_rdma, performance_profile, pcie_relaxed_ordering + + def _collect_pensando_nic_structured(self, results: Dict[str, NicCommandResult]) -> Tuple[ + List[PensandoNicCard], + List[PensandoNicDcqcn], + List[PensandoNicEnvironment], + List[PensandoNicLif], + List[PensandoNicPcieAts], + List[PensandoNicPort], + List[PensandoNicQos], + List[PensandoNicRdmaStatistics], + Optional[PensandoNicVersionHostSoftware], + List[PensandoNicVersionFirmware], + ]: + """Build Pensando NIC structured data from results using legacy text parsers.""" + + def _stdout(cmd: str) -> str: + r = results.get(cmd) + return (r.stdout or "").strip() if r and r.exit_code == 0 else "" + + cards = self._parse_nicctl_card(_stdout("nicctl show card")) + dcqcn_entries = self._parse_nicctl_dcqcn(_stdout("nicctl show dcqcn")) + environment_entries = self._parse_nicctl_environment(_stdout("nicctl show environment")) + lif_entries = self._parse_nicctl_lif(_stdout("nicctl show lif")) + pcie_ats_entries = self._parse_nicctl_pcie_ats(_stdout("nicctl show pcie ats")) + port_entries = self._parse_nicctl_port(_stdout("nicctl show port")) + qos_entries = self._parse_nicctl_qos(_stdout("nicctl show qos")) + rdma_statistics_entries = self._parse_nicctl_rdma_statistics( + _stdout("nicctl show rdma statistics") + ) + version_host_software = self._parse_nicctl_version_host_software( + _stdout("nicctl show version host-software") + ) + version_firmware_entries = self._parse_nicctl_version_firmware( + _stdout("nicctl show version firmware") + ) + + return ( + cards, + dcqcn_entries, + environment_entries, + lif_entries, + pcie_ats_entries, + port_entries, + qos_entries, + rdma_statistics_entries, + version_host_software, + version_firmware_entries, + ) + + def _parse_niccli_listdev(self, stdout: str) -> List[NicCliDevice]: + """Parse niccli --list_devices output into NicCliDevice list.""" + devices: List[NicCliDevice] = [] + current_num: Optional[int] = None + model = adapter_port = interface_name = mac_address = pci_address = None + for line in stdout.splitlines(): + line = line.strip() + if not line: + continue + num_match = re.match(r"^(\d+)\s*\)\s*(.*)", line) + if num_match: + if current_num is not None and model is not None: + devices.append( + NicCliDevice( + device_num=current_num, + model=model.strip() or None, + adapter_port=adapter_port, + interface_name=interface_name, + mac_address=mac_address, + pci_address=pci_address, + ) + ) + current_num = int(num_match.group(1)) + rest = num_match.group(2).strip() + if rest and "(" in rest and ")" in rest: + model = re.sub(r"\s*\([^)]+\)\s*$", "", rest).strip() or None + port_match = re.search(r"\(([^)]+)\)\s*$", rest) + adapter_port = port_match.group(1).strip() if port_match else None + else: + model = rest or None + adapter_port = None + interface_name = mac_address = pci_address = None + continue + if current_num is None: + continue + if ":" in line: + key, _, val = line.partition(":") + key, val = key.strip().lower(), val.strip() + if "interface" in key or "device interface" in key: + interface_name = val or None + elif "mac" in key: + mac_address = val or None + elif "pci" in key: + pci_address = val or None + if current_num is not None and ( + model is not None or interface_name or mac_address or pci_address + ): + devices.append( + NicCliDevice( + device_num=current_num, + model=model, + adapter_port=adapter_port, + interface_name=interface_name, + mac_address=mac_address, + pci_address=pci_address, + ) + ) + return devices + + def _parse_niccli_qos(self, device_num: int, stdout: str) -> NicCliQos: + """Parse niccli -dev X qos --ets --show output.""" + prio_map: Dict[int, int] = {} + tc_bandwidth: List[int] = [] + tsa_map: Dict[int, str] = {} + pfc_enabled: Optional[int] = None + app_entries: List[NicCliQosAppEntry] = [] + tc_rate_limit: List[int] = [] + for line in stdout.splitlines(): + line = line.strip() + if "PRIO_MAP:" in line or "PRIO_MAP" in line: + for part in re.findall(r"(\d+):(\d+)", line): + prio_map[int(part[0])] = int(part[1]) + if "TC Bandwidth:" in line: + tc_bandwidth = [int(x) for x in re.findall(r"(\d+)%", line)] + if "TSA_MAP:" in line: + for i, m in enumerate(re.findall(r"\d+:(\w+)", line)): + tsa_map[i] = m + if "PFC enabled:" in line: + m = re.search(r"PFC enabled:\s*(\d+)", line, re.I) + if m: + pfc_enabled = int(m.group(1)) + if "APP#" in line and not app_entries: + app_entries = _parse_niccli_qos_app_entries(stdout) + if "TC Rate Limit:" in line: + tc_rate_limit = [int(x) for x in re.findall(r"(\d+)%", line)] + return NicCliQos( + device_num=device_num, + raw_output=stdout, + prio_map=prio_map, + tc_bandwidth=tc_bandwidth, + tsa_map=tsa_map, + pfc_enabled=pfc_enabled, + app_entries=app_entries, + tc_rate_limit=tc_rate_limit, + ) + + def _parse_nicctl_card(self, stdout: str) -> List[PensandoNicCard]: + """Parse nicctl show card (text table) into PensandoNicCard list.""" + cards: List[PensandoNicCard] = [] + for line in stdout.splitlines(): + line = line.strip() + if not line or line.startswith("-") or "PCIe BDF" in line or "Id " in line: + continue + parts = line.split() + if ( + len(parts) >= 2 + and re.match(r"^[0-9a-f-]{36}$", parts[0]) + and re.match(r"^[0-9a-f:.]{12,}$", parts[1]) + ): + card_id, pcie_bdf = parts[0], parts[1] + asic = parts[2] if len(parts) > 2 and not parts[2].startswith("0") else None + fw_partition = parts[3] if len(parts) > 3 and parts[3] in ("A", "B") else None + serial_number = parts[4] if len(parts) > 4 else None + cards.append( + PensandoNicCard( + id=card_id, + pcie_bdf=pcie_bdf, + asic=asic, + fw_partition=fw_partition, + serial_number=serial_number, + ) + ) + return cards + + def _parse_nicctl_dcqcn(self, stdout: str) -> List[PensandoNicDcqcn]: + """Parse nicctl show dcqcn (text) into PensandoNicDcqcn list.""" + entries: List[PensandoNicDcqcn] = [] + nic_id = pcie_bdf = None + lif_id = roce_device = dcqcn_profile_id = status = None + for line in stdout.splitlines(): + if "NIC :" in line or "NIC:" in line: + m = re.search(r"NIC\s*:\s*([^\s(]+)\s*\(([^)]+)\)", line) + if m: + nic_id, pcie_bdf = m.group(1).strip(), m.group(2).strip() + lif_id = roce_device = dcqcn_profile_id = status = None + if nic_id and "Lif id" in line and ":" in line: + lif_id = line.split(":", 1)[1].strip() + if nic_id and "ROCE device" in line and ":" in line: + roce_device = line.split(":", 1)[1].strip() + if nic_id and "DCQCN profile id" in line and ":" in line: + dcqcn_profile_id = line.split(":", 1)[1].strip() + if nic_id and "Status" in line and ":" in line: + status = line.split(":", 1)[1].strip() + entries.append( + PensandoNicDcqcn( + nic_id=nic_id, + pcie_bdf=pcie_bdf or "", + lif_id=lif_id, + roce_device=roce_device, + dcqcn_profile_id=dcqcn_profile_id, + status=status, + ) + ) + return entries + + def _parse_nicctl_environment(self, stdout: str) -> List[PensandoNicEnvironment]: + """Parse nicctl show environment (text) into PensandoNicEnvironment list.""" + entries: List[PensandoNicEnvironment] = [] + nic_id = pcie_bdf = None + data: Dict[str, Optional[float]] = {} + for line in stdout.splitlines(): + if "NIC :" in line or "NIC:" in line: + m = re.search(r"NIC\s*:\s*([^\s(]+)\s*\(([^)]+)\)", line) + if m: + if nic_id and pcie_bdf: + entries.append( + PensandoNicEnvironment( + nic_id=nic_id, + pcie_bdf=pcie_bdf, + total_power_drawn=data.get("total_power_drawn"), + core_power=data.get("core_power"), + arm_power=data.get("arm_power"), + local_board_temperature=data.get("local_board_temperature"), + die_temperature=data.get("die_temperature"), + input_voltage=data.get("input_voltage"), + core_voltage=data.get("core_voltage"), + core_frequency=data.get("core_frequency"), + cpu_frequency=data.get("cpu_frequency"), + p4_stage_frequency=data.get("p4_stage_frequency"), + ) + ) + nic_id, pcie_bdf = m.group(1).strip(), m.group(2).strip() + data = {} + if nic_id and ":" in line: + key, _, val = line.partition(":") + key, val = key.strip().lower(), val.strip() + try: + v = float(val) + if "total power" in key or "pin" in key: + data["total_power_drawn"] = v + elif "core power" in key or "pout1" in key: + data["core_power"] = v + elif "arm power" in key or "pout2" in key: + data["arm_power"] = v + elif "local board" in key: + data["local_board_temperature"] = v + elif "die temperature" in key: + data["die_temperature"] = v + elif "input voltage" in key: + data["input_voltage"] = v + elif "core voltage" in key: + data["core_voltage"] = v + elif "core frequency" in key: + data["core_frequency"] = v + elif "cpu frequency" in key: + data["cpu_frequency"] = v + elif "p4 stage" in key: + data["p4_stage_frequency"] = v + except ValueError: + pass + if nic_id and pcie_bdf: + entries.append( + PensandoNicEnvironment( + nic_id=nic_id, + pcie_bdf=pcie_bdf, + total_power_drawn=data.get("total_power_drawn"), + core_power=data.get("core_power"), + arm_power=data.get("arm_power"), + local_board_temperature=data.get("local_board_temperature"), + die_temperature=data.get("die_temperature"), + input_voltage=data.get("input_voltage"), + core_voltage=data.get("core_voltage"), + core_frequency=data.get("core_frequency"), + cpu_frequency=data.get("cpu_frequency"), + p4_stage_frequency=data.get("p4_stage_frequency"), + ) + ) + return entries + + def _parse_nicctl_lif(self, stdout: str) -> List[PensandoNicLif]: + """Parse nicctl show lif (text) into PensandoNicLif list.""" + entries: List[PensandoNicLif] = [] + nic_id = pcie_bdf = None + for line in stdout.splitlines(): + if "NIC " in line and ":" in line and "(" in line: + m = re.search(r"NIC\s*:\s*([^\s(]+)\s*\(([^)]+)\)", line) + if m: + nic_id, pcie_bdf = m.group(1).strip(), m.group(2).strip() + if "LIF :" in line or "Lif :" in line or "Lif:" in line: + rest = line.split(":", 1)[-1].strip() + lif_match = re.match(r"([0-9a-f-]{36})\s*\(([^)]*)\)", rest) + if lif_match and nic_id: + lif_id, lif_name = lif_match.group(1), lif_match.group(2).strip() + entries.append( + PensandoNicLif( + nic_id=nic_id, + pcie_bdf=pcie_bdf or "", + lif_id=lif_id, + lif_name=lif_name or None, + ) + ) + elif re.match(r"^[0-9a-f-]{36}$", rest.strip()) and nic_id: + entries.append( + PensandoNicLif( + nic_id=nic_id, + pcie_bdf=pcie_bdf or "", + lif_id=rest.strip(), + lif_name=None, + ) + ) + return entries + + def _parse_nicctl_pcie_ats(self, stdout: str) -> List[PensandoNicPcieAts]: + """Parse nicctl show pcie ats (text) into PensandoNicPcieAts list.""" + entries: List[PensandoNicPcieAts] = [] + for line in stdout.splitlines(): + m = re.search(r"NIC\s*:\s*([^\s(]+)\s*\(([^)]+)\)\s*:\s*(\w+)", line) + if m: + entries.append( + PensandoNicPcieAts( + nic_id=m.group(1).strip(), + pcie_bdf=m.group(2).strip(), + status=m.group(3).strip(), + ) + ) + return entries + + def _parse_nicctl_port(self, stdout: str) -> List[PensandoNicPort]: + """Parse nicctl show port (text) into PensandoNicPort list.""" + entries: List[PensandoNicPort] = [] + nic_id = pcie_bdf = None + port_id = port_name = None + spec_speed = status_operational_status = None + for line in stdout.splitlines(): + if "NIC " in line and ":" in line and "(" in line: + m = re.search(r"NIC\s*:\s*([^\s(]+)\s*\(([^)]+)\)", line) + if m: + nic_id, pcie_bdf = m.group(1).strip(), m.group(2).strip() + port_id = port_name = None + if "Port :" in line or "Port:" in line: + if nic_id and port_id is not None: + entries.append( + PensandoNicPort( + nic_id=nic_id, + pcie_bdf=pcie_bdf or "", + port_id=port_id, + port_name=port_name or port_id, + spec_speed=spec_speed, + status_operational_status=status_operational_status, + ) + ) + rest = line.split(":", 1)[-1].strip() + port_match = re.match(r"([0-9a-f-]{36})\s*\(([^)]+)\)", rest) + if port_match: + port_id, port_name = port_match.group(1), port_match.group(2) + else: + port_id = rest if re.match(r"^[0-9a-f-]{36}$", rest.strip()) else None + port_name = "" + spec_speed = status_operational_status = None + if ( + nic_id + and "speed" in line + and ":" in line + and "Spec" not in line + and "Advertised" not in line + ): + spec_speed = line.split(":", 1)[1].strip() + if nic_id and "Operational status" in line and ":" in line: + status_operational_status = line.split(":", 1)[1].strip() + if nic_id and port_id is not None: + entries.append( + PensandoNicPort( + nic_id=nic_id, + pcie_bdf=pcie_bdf or "", + port_id=port_id, + port_name=port_name or port_id, + spec_speed=spec_speed, + status_operational_status=status_operational_status, + ) + ) + return entries + + def _parse_nicctl_qos(self, stdout: str) -> List[PensandoNicQos]: + """Parse nicctl show qos (text) into PensandoNicQos list.""" + entries: List[PensandoNicQos] = [] + nic_id = pcie_bdf = port_id = None + classification_type = None + scheduling: List[PensandoNicQosScheduling] = [] + for line in stdout.splitlines(): + if "NIC " in line and "(" in line: + m = re.search(r"NIC\s*:\s*([^\s(]+)\s*\(([^)]+)\)", line) + if m: + nic_id, pcie_bdf = m.group(1).strip(), m.group(2).strip() + port_id = None + scheduling = [] + if "Port :" in line: + port_match = re.search(r"([0-9a-f-]{36})", line) + port_id = port_match.group(1) if port_match else "" + if "Classification type" in line and ":" in line: + classification_type = line.split(":", 1)[1].strip() + if "DWRR" in line or "Scheduling" in line: + parts = line.split() + if len(parts) >= 3: + try: + prio = int(parts[0]) + sched_type = parts[1] if len(parts) > 1 else None + bw = int(parts[2]) if parts[2].isdigit() else None + rate = parts[3] if len(parts) > 3 else None + scheduling.append( + PensandoNicQosScheduling( + priority=prio, + scheduling_type=sched_type, + bandwidth=bw, + rate_limit=rate, + ) + ) + except (ValueError, IndexError): + pass + if nic_id and port_id and (classification_type is not None or scheduling): + entries.append( + PensandoNicQos( + nic_id=nic_id, + pcie_bdf=pcie_bdf or "", + port_id=port_id, + classification_type=classification_type, + scheduling=scheduling, + ) + ) + return entries + + def _parse_nicctl_rdma_statistics(self, stdout: str) -> List[PensandoNicRdmaStatistics]: + """Parse nicctl show rdma statistics (text) into PensandoNicRdmaStatistics list.""" + entries: List[PensandoNicRdmaStatistics] = [] + nic_id = pcie_bdf = None + stats: List[PensandoNicRdmaStatistic] = [] + for line in stdout.splitlines(): + if "NIC :" in line or "NIC:" in line: + m = re.search(r"NIC\s*:\s*([^\s(]+)\s*\(([^)]+)\)", line) + if m: + if nic_id and stats: + entries.append( + PensandoNicRdmaStatistics( + nic_id=nic_id, + pcie_bdf=pcie_bdf or "", + statistics=stats, + ) + ) + nic_id, pcie_bdf = m.group(1).strip(), m.group(2).strip() + stats = [] + if nic_id and ":" in line and "NIC" not in line: + key, _, val = line.partition(":") + name, val = key.strip(), val.strip() + try: + count = int(val) + stats.append(PensandoNicRdmaStatistic(name=name, count=count)) + except ValueError: + pass + if nic_id and stats: + entries.append( + PensandoNicRdmaStatistics( + nic_id=nic_id, + pcie_bdf=pcie_bdf or "", + statistics=stats, + ) + ) + return entries + + def _parse_nicctl_version_host_software( + self, stdout: str + ) -> Optional[PensandoNicVersionHostSoftware]: + """Parse nicctl show version host-software (text).""" + if not stdout or not stdout.strip(): + return None + version = ipc_driver = ionic_driver = None + for line in stdout.splitlines(): + if ":" in line: + key, _, val = line.partition(":") + key, val = key.strip().lower(), val.strip() + if "nicctl" in key: + version = val + elif "ipc" in key: + ipc_driver = val + elif "ionic" in key: + ionic_driver = val + return PensandoNicVersionHostSoftware( + version=version, + ipc_driver=ipc_driver, + ionic_driver=ionic_driver, + ) + + def _parse_nicctl_version_firmware(self, stdout: str) -> List[PensandoNicVersionFirmware]: + """Parse nicctl show version firmware (text) into PensandoNicVersionFirmware list.""" + entries: List[PensandoNicVersionFirmware] = [] + nic_id = pcie_bdf = None + cpld = boot0 = uboot_a = firmware_a = device_config_a = None + for line in stdout.splitlines(): + if "NIC :" in line or "NIC:" in line: + m = re.search(r"NIC\s*:\s*([^\s(]+)\s*\(([^)]+)\)", line) + if m: + if nic_id: + entries.append( + PensandoNicVersionFirmware( + nic_id=nic_id, + pcie_bdf=pcie_bdf or "", + cpld=cpld, + boot0=boot0, + uboot_a=uboot_a, + firmware_a=firmware_a, + device_config_a=device_config_a, + ) + ) + nic_id, pcie_bdf = m.group(1).strip(), m.group(2).strip() + cpld = boot0 = uboot_a = firmware_a = device_config_a = None + if nic_id and ":" in line: + key, _, val = line.partition(":") + key, val = key.strip().lower(), val.strip() + if "cpld" in key: + cpld = val + elif "boot0" in key: + boot0 = val + elif "uboot-a" in key or "uboot_a" in key: + uboot_a = val + elif "firmware-a" in key or "firmware_a" in key: + firmware_a = val + elif "device config" in key or "device_config" in key: + device_config_a = val + if nic_id: + entries.append( + PensandoNicVersionFirmware( + nic_id=nic_id, + pcie_bdf=pcie_bdf or "", + cpld=cpld, + boot0=boot0, + uboot_a=uboot_a, + firmware_a=firmware_a, + device_config_a=device_config_a, + ) + ) + return entries diff --git a/nodescraper/plugins/inband/nic/nic_data.py b/nodescraper/plugins/inband/nic/nic_data.py new file mode 100644 index 00000000..40e16216 --- /dev/null +++ b/nodescraper/plugins/inband/nic/nic_data.py @@ -0,0 +1,411 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +import re +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field + +from nodescraper.models import DataModel + + +class NicCtlCardShow(BaseModel): + """Outputs from global 'nicctl show card *' commands (flash, interrupts, logs, profile, time, statistics).""" + + flash_partition: Optional[Any] = None + interrupts: Optional[Any] = None + logs_non_persistent: Optional[str] = None + logs_boot_fault: Optional[str] = None + logs_persistent: Optional[str] = None + profile: Optional[Any] = None + time: Optional[Any] = None + statistics_packet_buffer_summary: Optional[Any] = None + + +class NicCtlCard(BaseModel): + """Per-card data: identity from 'nicctl show card' plus per-card commands (hardware-config, dcqcn).""" + + card_id: str + info: Optional[Any] = Field( + default=None, description="Card entry from nicctl show card --json list." + ) + hardware_config: Optional[str] = Field( + default=None, description="Raw stdout from nicctl show card hardware-config --card {id}." + ) + dcqcn: Optional[Any] = Field( + default=None, description="Parsed JSON from nicctl show dcqcn --card {id} --json." + ) + + +class NicCtlPort(BaseModel): + """Outputs from 'nicctl show port *' commands.""" + + port: Optional[Any] = Field(default=None, description="Parsed from nicctl show port --json.") + port_fsm: Optional[str] = Field( + default=None, description="Raw stdout from nicctl show port fsm." + ) + port_transceiver: Optional[Any] = Field( + default=None, description="Parsed from nicctl show port transceiver --json." + ) + port_statistics: Optional[Any] = Field( + default=None, description="Parsed from nicctl show port statistics --json." + ) + port_internal_mac: Optional[str] = Field( + default=None, description="Raw stdout from nicctl show port internal mac." + ) + + +class NicCtlLif(BaseModel): + """Outputs from 'nicctl show lif *' commands.""" + + lif: Optional[Any] = Field(default=None, description="Parsed from nicctl show lif --json.") + lif_statistics: Optional[Any] = Field( + default=None, description="Parsed from nicctl show lif statistics --json." + ) + lif_internal_queue_to_ud_pinning: Optional[str] = Field( + default=None, + description="Raw stdout from nicctl show lif internal queue-to-ud-pinning.", + ) + + +class NicCtlQos(BaseModel): + """Outputs from 'nicctl show qos *' commands.""" + + qos: Optional[Any] = Field(default=None, description="Parsed from nicctl show qos --json.") + qos_headroom: Optional[Any] = Field( + default=None, description="Parsed from nicctl show qos headroom --json." + ) + + +class NicCtlRdma(BaseModel): + """Outputs from 'nicctl show rdma *' commands.""" + + rdma_queue: Optional[Any] = Field( + default=None, description="Parsed from nicctl show rdma queue --json." + ) + rdma_queue_pair_detail: Optional[Any] = Field( + default=None, + description="Parsed from nicctl show rdma queue-pair --detail --json.", + ) + rdma_statistics: Optional[Any] = Field( + default=None, description="Parsed from nicctl show rdma statistics --json." + ) + + +class NicCtlDcqcn(BaseModel): + """Global DCQCN output; per-card DCQCN is in NicCtlCard.dcqcn.""" + + dcqcn_global: Optional[Any] = Field( + default=None, description="Parsed from nicctl show dcqcn --json." + ) + + +class NicCtlEnvironment(BaseModel): + """Output from 'nicctl show environment'.""" + + environment: Optional[Any] = None + + +class NicCtlVersion(BaseModel): + """Version outputs from nicctl.""" + + version: Optional[str] = Field(default=None, description="Raw stdout from nicctl --version.") + version_firmware: Optional[str] = Field( + default=None, description="Raw stdout from nicctl show version firmware." + ) + + +class NicCliDevice(BaseModel): + """NIC device from niccli --list_devices (Broadcom).""" + + device_num: int + model: Optional[str] = None + adapter_port: Optional[str] = None + interface_name: Optional[str] = None + mac_address: Optional[str] = None + pci_address: Optional[str] = None + + +class NicCliQosAppEntry(BaseModel): + """APP TLV entry in niccli QoS output (Broadcom).""" + + priority: Optional[int] = None + sel: Optional[int] = None + dscp: Optional[int] = None + protocol: Optional[str] = None + port: Optional[int] = None + + +class NicCliQos(BaseModel): + """NIC QoS from niccli -dev X getqos / qos --ets --show (Broadcom).""" + + device_num: int + raw_output: str + prio_map: Dict[int, int] = Field(default_factory=dict) + tc_bandwidth: List[int] = Field(default_factory=list) + tsa_map: Dict[int, str] = Field(default_factory=dict) + pfc_enabled: Optional[int] = None + app_entries: List[NicCliQosAppEntry] = Field(default_factory=list) + tc_rate_limit: List[int] = Field(default_factory=list) + + +class PensandoNicCard(BaseModel): + """Pensando NIC card from nicctl show card (text).""" + + id: str + pcie_bdf: str + asic: Optional[str] = None + fw_partition: Optional[str] = None + serial_number: Optional[str] = None + + +class PensandoNicDcqcn(BaseModel): + """Pensando NIC DCQCN from nicctl show dcqcn (text).""" + + nic_id: str + pcie_bdf: str + lif_id: Optional[str] = None + roce_device: Optional[str] = None + dcqcn_profile_id: Optional[str] = None + status: Optional[str] = None + + +class PensandoNicEnvironment(BaseModel): + """Pensando NIC environment from nicctl show environment (text).""" + + nic_id: str + pcie_bdf: str + total_power_drawn: Optional[float] = None + core_power: Optional[float] = None + arm_power: Optional[float] = None + local_board_temperature: Optional[float] = None + die_temperature: Optional[float] = None + input_voltage: Optional[float] = None + core_voltage: Optional[float] = None + core_frequency: Optional[float] = None + cpu_frequency: Optional[float] = None + p4_stage_frequency: Optional[float] = None + + +class PensandoNicPcieAts(BaseModel): + """Pensando NIC PCIe ATS from nicctl show pcie ats (text).""" + + nic_id: str + pcie_bdf: str + status: str + + +class PensandoNicLif(BaseModel): + """Pensando NIC LIF from nicctl show lif (text).""" + + nic_id: str + pcie_bdf: str + lif_id: str + lif_name: Optional[str] = None + + +class PensandoNicPort(BaseModel): + """Pensando NIC port from nicctl show port (text).""" + + nic_id: str + pcie_bdf: str + port_id: str + port_name: str + spec_ifindex: Optional[str] = None + spec_type: Optional[str] = None + spec_speed: Optional[str] = None + spec_admin_state: Optional[str] = None + spec_fec_type: Optional[str] = None + spec_pause_type: Optional[str] = None + spec_num_lanes: Optional[int] = None + spec_mtu: Optional[int] = None + spec_tx_pause: Optional[str] = None + spec_rx_pause: Optional[str] = None + spec_auto_negotiation: Optional[str] = None + status_physical_port: Optional[int] = None + status_operational_status: Optional[str] = None + status_link_fsm_state: Optional[str] = None + status_fec_type: Optional[str] = None + status_cable_type: Optional[str] = None + status_num_lanes: Optional[int] = None + status_speed: Optional[str] = None + status_auto_negotiation: Optional[str] = None + status_mac_id: Optional[int] = None + status_mac_channel: Optional[int] = None + status_mac_address: Optional[str] = None + status_transceiver_type: Optional[str] = None + status_transceiver_state: Optional[str] = None + status_transceiver_pid: Optional[str] = None + + +class PensandoNicQosScheduling(BaseModel): + """QoS Scheduling entry.""" + + priority: int + scheduling_type: Optional[str] = None + bandwidth: Optional[int] = None + rate_limit: Optional[str] = None + + +class PensandoNicQos(BaseModel): + """Pensando NIC QoS from nicctl show qos (text).""" + + nic_id: str + pcie_bdf: str + port_id: str + classification_type: Optional[str] = None + dscp_bitmap: Optional[str] = None + dscp_range: Optional[str] = None + dscp_priority: Optional[int] = None + pfc_priority_bitmap: Optional[str] = None + pfc_no_drop_priorities: Optional[str] = None + scheduling: List[PensandoNicQosScheduling] = Field(default_factory=list) + + +class PensandoNicRdmaStatistic(BaseModel): + """RDMA statistic entry.""" + + name: str + count: int + + +class PensandoNicRdmaStatistics(BaseModel): + """Pensando NIC RDMA statistics from nicctl show rdma statistics (text).""" + + nic_id: str + pcie_bdf: str + statistics: List[PensandoNicRdmaStatistic] = Field(default_factory=list) + + +class PensandoNicVersionHostSoftware(BaseModel): + """Pensando NIC host software version from nicctl show version host-software.""" + + version: Optional[str] = None + ipc_driver: Optional[str] = None + ionic_driver: Optional[str] = None + + +class PensandoNicVersionFirmware(BaseModel): + """Pensando NIC firmware version from nicctl show version firmware (text).""" + + nic_id: str + pcie_bdf: str + cpld: Optional[str] = None + boot0: Optional[str] = None + uboot_a: Optional[str] = None + firmware_a: Optional[str] = None + device_config_a: Optional[str] = None + + +def command_to_canonical_key(command: str) -> str: + """Turn a full command string into a stable key. + + E.g. 'nicctl show card --json' -> 'nicctl_show_card_json', + 'nicctl show dcqcn --card 0 --json' -> 'nicctl_show_dcqcn_card_0_json'. + """ + s = command.strip().lower() + s = re.sub(r"\s+", "_", s) + s = re.sub(r"--+", "_", s) + s = s.strip("_") + s = re.sub(r"_+", "_", s) + return s or "unknown" + + +class NicCommandResult(BaseModel): + """Result of a single niccli/nicctl command run.""" + + command: str + stdout: str = "" + stderr: str = "" + exit_code: int = 0 + + @property + def succeeded(self) -> bool: + """True if the command exited with code 0.""" + return self.exit_code == 0 + + +class NicDataModel(DataModel): + """Collected output of niccli (Broadcom) and nicctl (Pensando) commands.""" + + results: Dict[str, NicCommandResult] = Field(default_factory=dict) + + # Structured by domain (parsed from command output in collector) + card_show: Optional[NicCtlCardShow] = Field( + default=None, description="Global nicctl show card * outputs." + ) + cards: List[NicCtlCard] = Field( + default_factory=list, description="Per-card data (card list + hardware-config, dcqcn)." + ) + port: Optional[NicCtlPort] = None + lif: Optional[NicCtlLif] = None + qos: Optional[NicCtlQos] = None + rdma: Optional[NicCtlRdma] = None + dcqcn: Optional[NicCtlDcqcn] = None + environment: Optional[NicCtlEnvironment] = None + version: Optional[NicCtlVersion] = None + + broadcom_nic_devices: List[NicCliDevice] = Field(default_factory=list) + broadcom_nic_qos: Dict[int, NicCliQos] = Field(default_factory=dict) + broadcom_nic_support_rdma: Dict[int, str] = Field( + default_factory=dict, + description="Per-device output of 'niccli -dev X nvm -getoption support_rdma -scope 0' (device_num -> raw stdout).", + ) + broadcom_nic_performance_profile: Dict[int, str] = Field( + default_factory=dict, + description="Per-device output of 'niccli -dev X nvm -getoption performance_profile' (device_num -> raw stdout).", + ) + broadcom_nic_pcie_relaxed_ordering: Dict[int, str] = Field( + default_factory=dict, + description="Per-device output of 'niccli -dev X nvm -getoption pcie_relaxed_ordering' (device_num -> raw stdout).", + ) + pensando_nic_cards: List[PensandoNicCard] = Field(default_factory=list) + pensando_nic_dcqcn: List[PensandoNicDcqcn] = Field(default_factory=list) + pensando_nic_environment: List[PensandoNicEnvironment] = Field(default_factory=list) + pensando_nic_lif: List[PensandoNicLif] = Field(default_factory=list) + pensando_nic_pcie_ats: List[PensandoNicPcieAts] = Field(default_factory=list) + pensando_nic_ports: List[PensandoNicPort] = Field(default_factory=list) + pensando_nic_qos: List[PensandoNicQos] = Field(default_factory=list) + pensando_nic_rdma_statistics: List[PensandoNicRdmaStatistics] = Field(default_factory=list) + pensando_nic_version_host_software: Optional[PensandoNicVersionHostSoftware] = None + pensando_nic_version_firmware: List[PensandoNicVersionFirmware] = Field(default_factory=list) + + # Raw nicctl card log output for regex-based error detection + nicctl_card_logs: Optional[Dict[str, str]] = Field( + default=None, + description="Log text from 'nicctl show card logs --boot-fault', --persistent, --non-persistent (keys: boot_fault, persistent, non_persistent).", + ) + + def command_succeeded(self, command: str) -> bool: + """Return True if the command ran and exited with code 0.""" + r = self.results.get(command) + return r is not None and r.succeeded + + def get_card(self, card_id: str) -> Optional[NicCtlCard]: + """Return the per-card data for the given card id.""" + for c in self.cards: + if c.card_id == card_id: + return c + return None diff --git a/nodescraper/plugins/inband/nic/nic_plugin.py b/nodescraper/plugins/inband/nic/nic_plugin.py new file mode 100644 index 00000000..b26ac77b --- /dev/null +++ b/nodescraper/plugins/inband/nic/nic_plugin.py @@ -0,0 +1,46 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from nodescraper.base import InBandDataPlugin + +from .analyzer_args import NicAnalyzerArgs +from .collector_args import NicCollectorArgs +from .nic_analyzer import NicAnalyzer +from .nic_collector import NicCollector +from .nic_data import NicDataModel + + +class NicPlugin(InBandDataPlugin[NicDataModel, NicCollectorArgs, NicAnalyzerArgs]): + """Plugin for collecting niccli (Broadcom) and nicctl (Pensando) command output. + + Data is parsed into structured fields (card_show, cards, port, lif, qos, etc.). + The analyzer checks Broadcom support_rdma (niccli -dev x nvm -getoption support_rdma -scope 0). + """ + + DATA_MODEL = NicDataModel + COLLECTOR = NicCollector + COLLECTOR_ARGS = NicCollectorArgs + ANALYZER = NicAnalyzer + ANALYZER_ARGS = NicAnalyzerArgs diff --git a/nodescraper/utils.py b/nodescraper/utils.py index 96dd093a..3b9edf34 100644 --- a/nodescraper/utils.py +++ b/nodescraper/utils.py @@ -28,7 +28,18 @@ import re import traceback from enum import Enum -from typing import Any, List, Optional, Set, Type, TypeVar, Union, get_args, get_origin +from typing import ( + Any, + Dict, + List, + Optional, + Set, + Type, + TypeVar, + Union, + get_args, + get_origin, +) T = TypeVar("T") @@ -90,6 +101,28 @@ def str_or_none(val: object) -> Optional[str]: return s if s else None +MAX_STDERR_STDOUT_LENGTH_IN_EVENT = 4096 + + +def has_command_error_output(stderr: str, stdout: str) -> bool: + """True if the tool reported anything to stderr (errors are typically written to stderr).""" + return bool((stderr or "").strip()) + + +def command_result_event_data( + res: Any, + max_length: int = MAX_STDERR_STDOUT_LENGTH_IN_EVENT, +) -> Dict[str, Any]: + """Build event data dict from a command result (stderr and optionally stdout).""" + stderr = (getattr(res, "stderr", None) or "")[:max_length] + exit_code = getattr(res, "exit_code", None) + data: Dict[str, Any] = {"exit_code": exit_code, "stderr": stderr} + stdout = getattr(res, "stdout", None) or "" + if stdout and (exit_code != 0 or (stderr or "").strip()): + data["stdout"] = stdout[:max_length] + return data + + def convert_to_bytes(value: str, si=False) -> int: """ Convert human-readable memory sizes (like GB, MB) to bytes. diff --git a/test/functional/fixtures/nic_plugin_config_full_analyzer_args.json b/test/functional/fixtures/nic_plugin_config_full_analyzer_args.json new file mode 100644 index 00000000..f1cc1668 --- /dev/null +++ b/test/functional/fixtures/nic_plugin_config_full_analyzer_args.json @@ -0,0 +1,37 @@ +{ + "name": "NicPlugin config with all analyzer_args", + "desc": "NicPlugin check.", + "global_args": {}, + "plugins": { + "NicPlugin": { + "collection_args": {}, + "analysis_args": { + "expected_values": { + "niccli_list": {"require_success": true}, + "niccli_list_devices": {"require_success": true} + }, + "performance_profile_expected": "RoCE", + "support_rdma_disabled_values": ["0", "false", "disabled", "no", "off"], + "pcie_relaxed_ordering_expected": "enabled", + "expected_qos_prio_map": { + "0": 0, + "1": 1, + "2": 0, + "3": 1, + "4": 0, + "5": 1, + "6": 0, + "7": 1 + }, + "expected_qos_pfc_enabled": 255, + "expected_qos_tsa_map": { + "0": "ets", + "1": "ets" + }, + "expected_qos_tc_bandwidth": [50, 50], + "require_qos_consistent_across_adapters": true + } + } + }, + "result_collators": {} +} diff --git a/test/functional/fixtures/niccli_plugin_config.json b/test/functional/fixtures/niccli_plugin_config.json new file mode 100644 index 00000000..b1ea0f8a --- /dev/null +++ b/test/functional/fixtures/niccli_plugin_config.json @@ -0,0 +1,19 @@ +{ + "name": "NicPlugin config", + "desc": "Config for NicPlugin", + "global_args": {}, + "plugins": { + "NicPlugin": { + "collection_args": {}, + "analysis_args": { + "performance_profile_expected": "RoCE", + "support_rdma_disabled_values": ["0", "false", "disabled", "no", "off"], + "pcie_relaxed_ordering_expected": "enabled", + "expected_qos_pfc_enabled": 255, + "expected_qos_tc_bandwidth": [50, 50], + "require_qos_consistent_across_adapters": true + } + } + }, + "result_collators": {} +} diff --git a/test/functional/test_nic_plugin.py b/test/functional/test_nic_plugin.py new file mode 100644 index 00000000..ed9d28f2 --- /dev/null +++ b/test/functional/test_nic_plugin.py @@ -0,0 +1,137 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2026 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### + +from pathlib import Path + +import pytest + + +@pytest.fixture +def fixtures_dir(): + """Return path to fixtures directory.""" + return Path(__file__).parent / "fixtures" + + +@pytest.fixture +def nic_plugin_config_full_analyzer_args(fixtures_dir): + """Return path to NicPlugin config with all analyzer_args populated.""" + return fixtures_dir / "nic_plugin_config_full_analyzer_args.json" + + +@pytest.fixture +def nic_plugin_config_minimal(fixtures_dir): + """Return path to minimal NicPlugin config (niccli_plugin_config.json).""" + return fixtures_dir / "niccli_plugin_config.json" + + +def test_nic_plugin_with_full_analyzer_args_config( + run_cli_command, nic_plugin_config_full_analyzer_args, tmp_path +): + """Test NicPlugin using config with all analyzer_args (performance_profile, getqos, etc.).""" + assert ( + nic_plugin_config_full_analyzer_args.exists() + ), f"Config file not found: {nic_plugin_config_full_analyzer_args}" + + log_path = str(tmp_path / "logs_nic_full_args") + result = run_cli_command( + [ + "--log-path", + log_path, + "--plugin-configs", + str(nic_plugin_config_full_analyzer_args), + ], + check=False, + ) + + assert result.returncode in [ + 0, + 1, + 2, + ], f"Unexpected return code: {result.returncode}. stdout: {result.stdout[:500]!r}" + output = result.stdout + result.stderr + assert len(output) > 0 + assert "NicPlugin" in output or "nic" in output.lower() + + +def test_nic_plugin_with_minimal_config(run_cli_command, nic_plugin_config_minimal, tmp_path): + """Test NicPlugin using minimal config (default collection_args, no analysis_args).""" + assert nic_plugin_config_minimal.exists(), f"Config file not found: {nic_plugin_config_minimal}" + + log_path = str(tmp_path / "logs_nic_minimal") + result = run_cli_command( + ["--log-path", log_path, "--plugin-configs", str(nic_plugin_config_minimal)], + check=False, + ) + + assert result.returncode in [ + 0, + 1, + 2, + ], f"Unexpected return code: {result.returncode}. stdout: {result.stdout[:500]!r}" + output = result.stdout + result.stderr + assert len(output) > 0 + assert "NicPlugin" in output or "nic" in output.lower() + + +def test_nic_plugin_with_run_plugins_subcommand(run_cli_command, tmp_path): + """Test NicPlugin via run-plugins subcommand (no config).""" + log_path = str(tmp_path / "logs_nic_subcommand") + result = run_cli_command(["--log-path", log_path, "run-plugins", "NicPlugin"], check=False) + + assert result.returncode in [ + 0, + 1, + 2, + ], f"Unexpected return code: {result.returncode}. stdout: {result.stdout[:500]!r}" + output = result.stdout + result.stderr + assert len(output) > 0 + assert "NicPlugin" in output or "nic" in output.lower() + + +def test_nic_plugin_full_config_validates_analysis_args( + run_cli_command, nic_plugin_config_full_analyzer_args, tmp_path +): + """Config with all analyzer_args loads and runs without validation error.""" + assert nic_plugin_config_full_analyzer_args.exists() + + log_path = str(tmp_path / "logs_nic_validate") + result = run_cli_command( + [ + "--log-path", + log_path, + "--plugin-configs", + str(nic_plugin_config_full_analyzer_args), + ], + check=False, + ) + + assert result.returncode in [ + 0, + 1, + 2, + ], f"Unexpected return code: {result.returncode}. stdout: {result.stdout[:500]!r}" + output = result.stdout + result.stderr + assert "NicPlugin" in output diff --git a/test/functional/test_plugin_configs.py b/test/functional/test_plugin_configs.py index 6f0eabb9..768e9e6e 100644 --- a/test/functional/test_plugin_configs.py +++ b/test/functional/test_plugin_configs.py @@ -51,6 +51,7 @@ def plugin_config_files(fixtures_dir): "KernelPlugin": fixtures_dir / "kernel_plugin_config.json", "KernelModulePlugin": fixtures_dir / "kernel_module_plugin_config.json", "MemoryPlugin": fixtures_dir / "memory_plugin_config.json", + "NicPlugin": fixtures_dir / "niccli_plugin_config.json", "NvmePlugin": fixtures_dir / "nvme_plugin_config.json", "OsPlugin": fixtures_dir / "os_plugin_config.json", "PackagePlugin": fixtures_dir / "package_plugin_config.json", diff --git a/test/unit/plugin/test_network_collector.py b/test/unit/plugin/test_network_collector.py index f6580c16..6382adeb 100644 --- a/test/unit/plugin/test_network_collector.py +++ b/test/unit/plugin/test_network_collector.py @@ -32,20 +32,11 @@ from nodescraper.models.systeminfo import OSFamily from nodescraper.plugins.inband.network.network_collector import NetworkCollector from nodescraper.plugins.inband.network.networkdata import ( - BroadcomNicDevice, - BroadcomNicQos, EthtoolInfo, IpAddress, Neighbor, NetworkDataModel, NetworkInterface, - PensandoNicCard, - PensandoNicDcqcn, - PensandoNicEnvironment, - PensandoNicPcieAts, - PensandoNicPort, - PensandoNicQos, - PensandoNicQosScheduling, Route, RoutingRule, ) @@ -317,12 +308,6 @@ def run_sut_cmd_side_effect(cmd, **kwargs): elif "lldpcli" in cmd or "lldpctl" in cmd: # LLDP commands fail (not available) return MagicMock(exit_code=1, stdout="", command=cmd) - elif "niccli" in cmd: - # Broadcom NIC commands fail (not available) - return MagicMock(exit_code=1, stdout="", command=cmd) - elif "nicctl" in cmd: - # Pensando NIC commands fail (not available) - return MagicMock(exit_code=1, stdout="", command=cmd) return MagicMock(exit_code=1, stdout="", command=cmd) collector._run_sut_cmd = MagicMock(side_effect=run_sut_cmd_side_effect) @@ -336,8 +321,8 @@ def run_sut_cmd_side_effect(cmd, **kwargs): assert len(data.routes) == 3 assert len(data.rules) == 3 assert len(data.neighbors) == 2 - # Since nicctl commands fail in this test, we expect the failure message - assert "Network data collection failed" in result.message + # Ethtool/LLDP are mocked to fail; collector still reports success + assert "Network data collected successfully" in result.message def test_collect_data_addr_failure(collector, conn_mock): @@ -359,12 +344,6 @@ def run_sut_cmd_side_effect(cmd, **kwargs): elif "lldpcli" in cmd or "lldpctl" in cmd: # LLDP commands fail (not available) return MagicMock(exit_code=1, command=cmd) - elif "niccli" in cmd: - # Broadcom NIC commands fail (not available) - return MagicMock(exit_code=1, command=cmd) - elif "nicctl" in cmd: - # Pensando NIC commands fail (not available) - return MagicMock(exit_code=1, command=cmd) return MagicMock(exit_code=1, command=cmd) collector._run_sut_cmd = MagicMock(side_effect=run_sut_cmd_side_effect) @@ -589,1295 +568,6 @@ def test_network_data_model_creation(collector): assert data.ethtool_info["ethmock123"].speed == "1000mockMb/s" -# Sample Broadcom NIC command outputs for testing -NICCLI_LISTDEV_OUTPUT = """ -1 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#1 Port#1) - Device Interface Name : abcd1p1 - MAC Address : 81:82:83:84:85:88 - PCI Address : 0000:22:00.0 -""" - -NICCLI_QOS_OUTPUT = """ -IEEE 8021QAZ ETS Configuration TLV: - PRIO_MAP: 0:0 1:0 2:0 3:1 4:0 5:0 6:0 7:2 - TC Bandwidth: 50% 50% 0% - TSA_MAP: 0:ets 1:ets 2:strict -IEEE 8021QAZ PFC TLV: - PFC enabled: 3 -IEEE 8021QAZ APP TLV: - APP#0: - Priority: 7 - Sel: 5 - DSCP: 48 - - APP#1: - Priority: 3 - Sel: 5 - DSCP: 26 - - APP#2: - Priority: 3 - Sel: 3 - UDP or DCCP: 4791 - -TC Rate Limit: 100% 100% 100% 0% 0% 0% 0% 0% -""" - -NICCLI_QOS_MINIMAL_OUTPUT = """IEEE 8021QAZ ETS Configuration TLV: - PRIO_MAP: 0:0 1:1 - TC Bandwidth: 50% 50% - TSA_MAP: 0:ets 1:strict -IEEE 8021QAZ PFC TLV: - PFC enabled: 1 -TC Rate Limit: 100% 100% -""" - -# Sample Pensando NIC command outputs for testing -NICCTL_SHOW_CARD_OUTPUT = """ ---------------------------------------------------------------------------------------------- -Id PCIe BDF ASIC F/W partition Serial number ---------------------------------------------------------------------------------------------- -1111111-4c32-3533-3330-12345000000 0000:06:00.0 test1 A ABC1234 -2222222-4c32-3533-3731-78901500000 0000:16:00.0 test2 A DEF5678 -""" - -NICCTL_SHOW_DCQCN_OUTPUT = """ -NIC : 1111111-4c32-3533-3330-12345000000 (0000:06:00.0) ------------------------------------------------------------------------------------------- - -Lif id : 1111111-4c32-3533-3330-12345000000 -ROCE device : sample - DCQCN profile id : 1 - Status : Disabled -****************************************************************************************** -""" - -NICCTL_SHOW_ENVIRONMENT_OUTPUT = """ -NIC : 1111111-4c32-3533-3330-12345000000 (0000:06:00.0) - - Power(W): - Total power drawn (pin) : 29.437 - Core power (pout1) : 12.375 - ARM power (pout2) : 0.788 - Temperature(C): - Local board temperature : 44.12 - Die temperature : 45.59 - Voltage(mV): - Input voltage : 12078 - Core voltage : 725 - Frequency(MHz): - Core frequency : 1100 - CPU frequency : 1500 - P4 stage frequency : 1500 -------------------------------------------------------------------------------------- -""" - -NICCTL_SHOW_PCIE_ATS_OUTPUT = """ -NIC : 1111111-4c32-3533-3330-12345000000 (0000:06:00.0) : Disabled -""" - -NICCTL_SHOW_PORT_OUTPUT = """ -NIC : 1111111-4c32-3533-3330-12345000000 (0000:06:00.0) - -Port : 555555a-6c40-4242-4242-000011010000 (eth1/1) - Spec: - Ifindex : 0x11010000 - Type : ETH - speed : 400G - Admin state : UP - FEC type : RS - Pause type : PFC - Number of lanes : 4 - MTU : 9216 - TX pause : enabled - RX pause : enabled - Auto negotiation : disabled - Status: - Physical port : 1 - Operational status : DOWN - Link FSM state : SIGNAL_DETECT - FEC type : RS - Cable type : Copper - Number of lanes : 4 - speed : 400G - Auto negotiation : disabled - MAC ID : 0 - MAC channel : 0 - MAC address : 04:90:81:4a:6c:40 - Transceiver type : QSFP_CMIS - Transceiver state : SPROM-READ - Transceiver PID : QSFP-400G-CR4 -------------------------------------------------------------------------------------- -""" - -NICCTL_SHOW_QOS_OUTPUT = """ -NIC : 1111111-4c32-3533-3330-12345000000 (0000:06:00.0) - -Port : 0490814a-6c40-4242-4242-000011010000 - - Classification type : DSCP - - DSCP-to-priority : - DSCP bitmap : 0xffffffffffffffff ==> priority : 0 - DSCP : 0-63 ==> priority : 0 - - - PFC : - PFC priority bitmap : 0x0 - PFC no-drop priorities : - - Scheduling : - -------------------------------------------- - Priority Scheduling Bandwidth Rate-limit - Type (in %age) (in Gbps) - -------------------------------------------- - 0 DWRR 0 N/A -""" - - -def test_parse_niccli_listdev_device(collector): - """Test parsing Broadcom NIC device from niccli --list_devices output""" - devices = collector._parse_niccli_listdev(NICCLI_LISTDEV_OUTPUT) - - assert len(devices) == 1 - - # Check device - device1 = devices[0] - assert device1.device_num == 1 - assert device1.model == "Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC" - assert device1.adapter_port == "Adp#1 Port#1" - assert device1.interface_name == "abcd1p1" - assert device1.mac_address == "81:82:83:84:85:88" - assert device1.pci_address == "0000:22:00.0" - - -def test_parse_niccli_listdev_empty_output(collector): - """Test parsing empty niccli --list_devices output""" - devices = collector._parse_niccli_listdev("") - - assert len(devices) == 0 - - -def test_parse_niccli_listdev_malformed_output(collector): - """Test parsing malformed niccli --list_devices output gracefully""" - malformed = """some random text -not a valid device line -123 invalid format -""" - - devices = collector._parse_niccli_listdev(malformed) - - # Should handle gracefully, return empty list or skip invalid lines - assert isinstance(devices, list) - - -def test_parse_niccli_qos_complete(collector): - """Test parsing complete Broadcom NIC QoS output with all fields""" - qos = collector._parse_niccli_qos(1, NICCLI_QOS_OUTPUT) - - assert qos.device_num == 1 - assert qos.raw_output == NICCLI_QOS_OUTPUT - - # Check PRIO_MAP - assert len(qos.prio_map) == 8 - assert qos.prio_map[0] == 0 - assert qos.prio_map[1] == 0 - assert qos.prio_map[3] == 1 - assert qos.prio_map[7] == 2 - - # Check TC Bandwidth - assert len(qos.tc_bandwidth) == 3 - assert qos.tc_bandwidth[0] == 50 - assert qos.tc_bandwidth[1] == 50 - assert qos.tc_bandwidth[2] == 0 - - # Check TSA_MAP - assert len(qos.tsa_map) == 3 - assert qos.tsa_map[0] == "ets" - assert qos.tsa_map[1] == "ets" - assert qos.tsa_map[2] == "strict" - - # Check PFC enabled - assert qos.pfc_enabled == 3 - - # Check APP entries - assert len(qos.app_entries) == 3 - - # Check APP#0 - app0 = qos.app_entries[0] - assert app0.priority == 7 - assert app0.sel == 5 - assert app0.dscp == 48 - assert app0.protocol is None - assert app0.port is None - - # Check APP#1 - app1 = qos.app_entries[1] - assert app1.priority == 3 - assert app1.sel == 5 - assert app1.dscp == 26 - - # Check APP#2 (with protocol and port) - app2 = qos.app_entries[2] - assert app2.priority == 3 - assert app2.sel == 3 - assert app2.dscp is None - assert app2.protocol == "UDP or DCCP" - assert app2.port == 4791 - - # Check TC Rate Limit - assert len(qos.tc_rate_limit) == 8 - assert qos.tc_rate_limit[0] == 100 - assert qos.tc_rate_limit[1] == 100 - assert qos.tc_rate_limit[2] == 100 - assert qos.tc_rate_limit[3] == 0 - assert qos.tc_rate_limit[7] == 0 - - -def test_parse_niccli_qos_empty_output(collector): - """Test parsing empty QoS output""" - qos = collector._parse_niccli_qos(1, "") - - assert qos.device_num == 1 - assert qos.raw_output == "" - assert len(qos.prio_map) == 0 - assert len(qos.tc_bandwidth) == 0 - assert len(qos.tsa_map) == 0 - assert qos.pfc_enabled is None - assert len(qos.app_entries) == 0 - assert len(qos.tc_rate_limit) == 0 - - -def test_parse_niccli_qos_multiple_app_protocols(collector): - """Test parsing QoS with APP entries having different protocols""" - qos_multi_protocol = """IEEE 8021QAZ ETS Configuration TLV: - PRIO_MAP: 0:0 - TC Bandwidth: 100% - TSA_MAP: 0:ets -IEEE 8021QAZ PFC TLV: - PFC enabled: 0 -IEEE 8021QAZ APP TLV: - APP#0: - Priority: 5 - Sel: 3 - TCP: 8080 - - APP#1: - Priority: 6 - Sel: 3 - UDP: 9000 - -TC Rate Limit: 100% -""" - - qos = collector._parse_niccli_qos(3, qos_multi_protocol) - - assert len(qos.app_entries) == 2 - - # Check TCP entry - app0 = qos.app_entries[0] - assert app0.priority == 5 - assert app0.sel == 3 - assert app0.protocol == "TCP" - assert app0.port == 8080 - - # Check UDP entry - app1 = qos.app_entries[1] - assert app1.priority == 6 - assert app1.sel == 3 - assert app1.protocol == "UDP" - assert app1.port == 9000 - - -def test_parse_niccli_qos_malformed_values(collector): - """Test parsing QoS output with malformed values gracefully""" - malformed = """IEEE 8021QAZ ETS Configuration TLV: - PRIO_MAP: 0:invalid 1:1 bad:data - TC Bandwidth: 50% invalid 50% - TSA_MAP: 0:ets bad:value 1:strict -IEEE 8021QAZ PFC TLV: - PFC enabled: not_a_number -TC Rate Limit: 100% bad% 100% -""" - - qos = collector._parse_niccli_qos(1, malformed) - - # Should skip invalid entries but parse valid ones - assert qos.device_num == 1 - # Should have parsed valid prio_map entry (1:1) - assert 1 in qos.prio_map - assert qos.prio_map[1] == 1 - # Should have parsed valid bandwidth entries - assert 50 in qos.tc_bandwidth - # Should have parsed valid tsa_map entries - assert qos.tsa_map.get(0) == "ets" - assert qos.tsa_map.get(1) == "strict" - # PFC should be None due to invalid number - assert qos.pfc_enabled is None - - -def test_network_data_model_with_broadcom_nic(collector): - """Test creating NetworkDataModel with Broadcom NIC data""" - device = BroadcomNicDevice( - device_num=1, - model="Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC", - adapter_port="Adp#1 Port#1", - interface_name="benic1p1", - mac_address="8C:84:74:37:C3:70", - pci_address="0000:06:00.0", - ) - - qos = BroadcomNicQos( - device_num=1, - raw_output="test output", - prio_map={0: 0, 1: 1}, - tc_bandwidth=[50, 50], - tsa_map={0: "ets", 1: "strict"}, - pfc_enabled=3, - tc_rate_limit=[100, 100], - ) - - data = NetworkDataModel( - interfaces=[], - routes=[], - rules=[], - neighbors=[], - ethtool_info={}, - broadcom_nic_devices=[device], - broadcom_nic_qos={1: qos}, - ) - - assert len(data.broadcom_nic_devices) == 1 - assert len(data.broadcom_nic_qos) == 1 - assert data.broadcom_nic_devices[0].device_num == 1 - assert data.broadcom_nic_devices[0].interface_name == "benic1p1" - assert data.broadcom_nic_qos[1].device_num == 1 - assert data.broadcom_nic_qos[1].pfc_enabled == 3 - - -def test_parse_nicctl_show_card_multiple_cards(collector): - """Test parsing multiple Pensando NIC cards from nicctl show card output""" - cards = collector._parse_nicctl_card(NICCTL_SHOW_CARD_OUTPUT) - - assert len(cards) == 2 - - # Check first card - card1 = cards[0] - assert card1.id == "1111111-4c32-3533-3330-12345000000" - assert card1.pcie_bdf == "0000:06:00.0" - assert card1.asic == "test1" - assert card1.fw_partition == "A" - assert card1.serial_number == "ABC1234" - - # Check second card - card2 = cards[1] - assert card2.id == "2222222-4c32-3533-3731-78901500000" - assert card2.pcie_bdf == "0000:16:00.0" - assert card2.asic == "test2" - assert card2.fw_partition == "A" - assert card2.serial_number == "DEF5678" - - -def test_parse_nicctl_show_card_empty_output(collector): - """Test parsing empty nicctl show card output""" - cards = collector._parse_nicctl_card("") - - assert len(cards) == 0 - - -def test_parse_nicctl_show_card_partial_fields(collector): - """Test parsing nicctl show card output with partial fields""" - partial_output = """ ---------------------------------------------------------------------------------------------- -Id PCIe BDF ASIC F/W partition Serial number ---------------------------------------------------------------------------------------------- -42424650-4c32-3533-3330-323934000000 0000:06:00.0 -42424650-4c32-3533-3731-304535000000 0000:16:00.0 salina -""" - - cards = collector._parse_nicctl_card(partial_output) - - assert len(cards) == 2 - - # First card with only ID and PCIe BDF - card1 = cards[0] - assert card1.id == "42424650-4c32-3533-3330-323934000000" - assert card1.pcie_bdf == "0000:06:00.0" - assert card1.asic is None - assert card1.fw_partition is None - assert card1.serial_number is None - - # Second card with ID, PCIe BDF, and ASIC - card2 = cards[1] - assert card2.id == "42424650-4c32-3533-3731-304535000000" - assert card2.pcie_bdf == "0000:16:00.0" - assert card2.asic == "salina" - assert card2.fw_partition is None - assert card2.serial_number is None - - -def test_parse_nicctl_show_card_malformed_output(collector): - """Test parsing malformed nicctl show card output gracefully""" - malformed = """some random text -not a valid card line -123 invalid format -""" - - cards = collector._parse_nicctl_card(malformed) - - # Should handle gracefully, return empty list or skip invalid lines - assert isinstance(cards, list) - # May parse some invalid entries, but should not crash - - -def test_network_data_model_with_pensando_nic(collector): - """Test creating NetworkDataModel with Pensando NIC data""" - card1 = PensandoNicCard( - id="42424650-4c32-3533-3330-323934000000", - pcie_bdf="0000:06:00.0", - asic="salina", - fw_partition="A", - serial_number="FPL25330294", - ) - - card2 = PensandoNicCard( - id="42424650-4c32-3533-3731-304535000000", - pcie_bdf="0000:16:00.0", - asic="salina", - fw_partition="A", - serial_number="FPL253710E5", - ) - - data = NetworkDataModel( - interfaces=[], - routes=[], - rules=[], - neighbors=[], - ethtool_info={}, - pensando_nic_cards=[card1, card2], - ) - - assert len(data.pensando_nic_cards) == 2 - assert data.pensando_nic_cards[0].id == "42424650-4c32-3533-3330-323934000000" - assert data.pensando_nic_cards[0].pcie_bdf == "0000:06:00.0" - assert data.pensando_nic_cards[0].asic == "salina" - assert data.pensando_nic_cards[1].serial_number == "FPL253710E5" - - -def test_collect_pensando_nic_success(collector, conn_mock): - """Test successful collection of Pensando NIC data""" - collector.system_info.os_family = OSFamily.LINUX - - # Mock successful nicctl command execution - def run_sut_cmd_side_effect(cmd, **kwargs): - if "nicctl show card" in cmd: - return MagicMock(exit_code=0, stdout=NICCTL_SHOW_CARD_OUTPUT, command=cmd) - elif "nicctl show dcqcn" in cmd: - return MagicMock(exit_code=0, stdout=NICCTL_SHOW_DCQCN_OUTPUT, command=cmd) - elif "nicctl show environment" in cmd: - return MagicMock(exit_code=0, stdout=NICCTL_SHOW_ENVIRONMENT_OUTPUT, command=cmd) - elif "nicctl show pcie ats" in cmd: - return MagicMock(exit_code=0, stdout=NICCTL_SHOW_PCIE_ATS_OUTPUT, command=cmd) - elif "nicctl show port" in cmd: - return MagicMock(exit_code=0, stdout=NICCTL_SHOW_PORT_OUTPUT, command=cmd) - elif "nicctl show qos" in cmd: - return MagicMock(exit_code=0, stdout=NICCTL_SHOW_QOS_OUTPUT, command=cmd) - elif "nicctl show rdma statistics" in cmd: - return MagicMock(exit_code=0, stdout=NICCTL_SHOW_RDMA_STATISTICS_OUTPUT, command=cmd) - elif "nicctl show version host-software" in cmd: - return MagicMock( - exit_code=0, stdout=NICCTL_SHOW_VERSION_HOST_SOFTWARE_OUTPUT, command=cmd - ) - elif "nicctl show version firmware" in cmd: - return MagicMock(exit_code=0, stdout=NICCTL_SHOW_VERSION_FIRMWARE_OUTPUT, command=cmd) - elif "nicctl" in cmd: - # Other nicctl commands succeed but return empty - return MagicMock(exit_code=0, stdout="", command=cmd) - return MagicMock(exit_code=1, stdout="", command=cmd) - - collector._run_sut_cmd = MagicMock(side_effect=run_sut_cmd_side_effect) - - ( - cards, - dcqcn_entries, - environment_entries, - pcie_ats_entries, - port_entries, - qos_entries, - rdma_statistics_entries, - version_host_software, - version_firmware_entries, - uncollected_commands, - ) = collector._collect_pensando_nic_info() - - # All commands succeeded, so uncollected_commands should be empty - assert len(uncollected_commands) == 0 - - assert len(cards) == 2 - assert cards[0].id == "1111111-4c32-3533-3330-12345000000" - assert cards[0].pcie_bdf == "0000:06:00.0" - assert cards[0].asic == "test1" - assert cards[0].serial_number == "ABC1234" - - assert len(dcqcn_entries) == 1 - assert dcqcn_entries[0].nic_id == "1111111-4c32-3533-3330-12345000000" - assert dcqcn_entries[0].pcie_bdf == "0000:06:00.0" - - assert len(environment_entries) == 1 - assert environment_entries[0].nic_id == "1111111-4c32-3533-3330-12345000000" - assert environment_entries[0].pcie_bdf == "0000:06:00.0" - - assert len(pcie_ats_entries) == 1 - assert pcie_ats_entries[0].nic_id == "1111111-4c32-3533-3330-12345000000" - assert pcie_ats_entries[0].pcie_bdf == "0000:06:00.0" - assert pcie_ats_entries[0].status == "Disabled" - - assert len(port_entries) == 1 - assert port_entries[0].nic_id == "1111111-4c32-3533-3330-12345000000" - assert port_entries[0].pcie_bdf == "0000:06:00.0" - assert port_entries[0].port_name == "eth1/1" - - assert len(qos_entries) == 1 - assert qos_entries[0].nic_id == "1111111-4c32-3533-3330-12345000000" - assert qos_entries[0].pcie_bdf == "0000:06:00.0" - assert qos_entries[0].port_id == "0490814a-6c40-4242-4242-000011010000" - - assert len(rdma_statistics_entries) == 2 - assert rdma_statistics_entries[0].nic_id == "42424650-4c32-3533-3330-323934000000" - assert rdma_statistics_entries[0].pcie_bdf == "0000:06:00.0" - assert len(rdma_statistics_entries[0].statistics) == 2 - - assert version_host_software is not None - assert version_host_software.nicctl == "1.117.1-a-63" - assert version_host_software.ipc_driver == "1.117.1.a.63" - assert version_host_software.ionic_driver == "25.08.4.004" - - assert len(version_firmware_entries) == 2 - assert version_firmware_entries[0].nic_id == "42424650-4c32-3533-3330-323934000000" - assert version_firmware_entries[0].pcie_bdf == "0000:06:00.0" - assert version_firmware_entries[0].cpld == "3.16 (primary)" - - -def test_parse_nicctl_show_dcqcn_multiple_entries(collector): - """Test parsing Pensando NIC DCQCN entry from nicctl show dcqcn output""" - dcqcn_entries = collector._parse_nicctl_dcqcn(NICCTL_SHOW_DCQCN_OUTPUT) - - assert len(dcqcn_entries) == 1 - - # Check entry - entry1 = dcqcn_entries[0] - assert entry1.nic_id == "1111111-4c32-3533-3330-12345000000" - assert entry1.pcie_bdf == "0000:06:00.0" - assert entry1.lif_id == "1111111-4c32-3533-3330-12345000000" - assert entry1.roce_device == "sample" - assert entry1.dcqcn_profile_id == "1" - assert entry1.status == "Disabled" - - -def test_parse_nicctl_show_dcqcn_empty_output(collector): - """Test parsing empty nicctl show dcqcn output""" - dcqcn_entries = collector._parse_nicctl_dcqcn("") - - assert len(dcqcn_entries) == 0 - - -def test_parse_nicctl_show_dcqcn_partial_fields(collector): - """Test parsing nicctl show dcqcn output with partial fields""" - partial_output = """ -NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0) ------------------------------------------------------------------------------------------- - -Lif id : 43000070-0100-0000-4242-0490814a6c40 -****************************************************************************************** -""" - - dcqcn_entries = collector._parse_nicctl_dcqcn(partial_output) - - assert len(dcqcn_entries) == 1 - - # Entry with only NIC ID, PCIe BDF, and Lif ID - entry1 = dcqcn_entries[0] - assert entry1.nic_id == "42424650-4c32-3533-3330-323934000000" - assert entry1.pcie_bdf == "0000:06:00.0" - assert entry1.lif_id == "43000070-0100-0000-4242-0490814a6c40" - assert entry1.roce_device is None - assert entry1.dcqcn_profile_id is None - assert entry1.status is None - - -def test_parse_nicctl_show_dcqcn_malformed_output(collector): - """Test parsing malformed nicctl show dcqcn output gracefully""" - malformed = """some random text -not a valid dcqcn line -123 invalid format -""" - - dcqcn_entries = collector._parse_nicctl_dcqcn(malformed) - - # Should handle gracefully, return empty list - assert isinstance(dcqcn_entries, list) - assert len(dcqcn_entries) == 0 - - -def test_network_data_model_with_pensando_nic_dcqcn(collector): - """Test creating NetworkDataModel with Pensando NIC DCQCN data""" - dcqcn1 = PensandoNicDcqcn( - nic_id="42424650-4c32-3533-3330-323934000000", - pcie_bdf="0000:06:00.0", - lif_id="43000070-0100-0000-4242-0490814a6c40", - roce_device="rocep9s0", - dcqcn_profile_id="1", - status="Disabled", - ) - - dcqcn2 = PensandoNicDcqcn( - nic_id="42424650-4c32-3533-3731-304535000000", - pcie_bdf="0000:16:00.0", - lif_id="43000070-0100-0000-4242-0490815cce50", - roce_device="rocep25s0", - dcqcn_profile_id="1", - status="Disabled", - ) - - data = NetworkDataModel( - interfaces=[], - routes=[], - rules=[], - neighbors=[], - ethtool_info={}, - pensando_nic_dcqcn=[dcqcn1, dcqcn2], - ) - - assert len(data.pensando_nic_dcqcn) == 2 - assert data.pensando_nic_dcqcn[0].nic_id == "42424650-4c32-3533-3330-323934000000" - assert data.pensando_nic_dcqcn[0].pcie_bdf == "0000:06:00.0" - assert data.pensando_nic_dcqcn[0].roce_device == "rocep9s0" - assert data.pensando_nic_dcqcn[1].lif_id == "43000070-0100-0000-4242-0490815cce50" - - -def test_parse_nicctl_show_environment_multiple_entries(collector): - """Test parsing Pensando NIC environment entry from nicctl show environment output""" - environment_entries = collector._parse_nicctl_environment(NICCTL_SHOW_ENVIRONMENT_OUTPUT) - - assert len(environment_entries) == 1 - - # Check entry - entry1 = environment_entries[0] - assert entry1.nic_id == "1111111-4c32-3533-3330-12345000000" - assert entry1.pcie_bdf == "0000:06:00.0" - assert entry1.total_power_drawn == 29.437 - assert entry1.core_power == 12.375 - assert entry1.arm_power == 0.788 - assert entry1.local_board_temperature == 44.12 - assert entry1.die_temperature == 45.59 - assert entry1.input_voltage == 12078 - assert entry1.core_voltage == 725 - assert entry1.core_frequency == 1100 - assert entry1.cpu_frequency == 1500 - assert entry1.p4_stage_frequency == 1500 - - -def test_parse_nicctl_show_environment_empty_output(collector): - """Test parsing empty nicctl show environment output""" - environment_entries = collector._parse_nicctl_environment("") - - assert len(environment_entries) == 0 - - -def test_parse_nicctl_show_environment_partial_fields(collector): - """Test parsing nicctl show environment output with partial fields""" - partial_output = """ -NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0) - - Power(W): - Total power drawn (pin) : 29.437 - Temperature(C): - Local board temperature : 44.12 -------------------------------------------------------------------------------------- -""" - - environment_entries = collector._parse_nicctl_environment(partial_output) - - assert len(environment_entries) == 1 - - # Entry with only some fields - entry1 = environment_entries[0] - assert entry1.nic_id == "42424650-4c32-3533-3330-323934000000" - assert entry1.pcie_bdf == "0000:06:00.0" - assert entry1.total_power_drawn == 29.437 - assert entry1.local_board_temperature == 44.12 - assert entry1.core_power is None - assert entry1.die_temperature is None - assert entry1.input_voltage is None - - -def test_parse_nicctl_show_environment_malformed_output(collector): - """Test parsing malformed nicctl show environment output gracefully""" - malformed = """some random text -not a valid environment line -123 invalid format -""" - - environment_entries = collector._parse_nicctl_environment(malformed) - - # Should handle gracefully, return empty list - assert isinstance(environment_entries, list) - assert len(environment_entries) == 0 - - -def test_network_data_model_with_pensando_nic_environment(collector): - """Test creating NetworkDataModel with Pensando NIC environment data""" - env1 = PensandoNicEnvironment( - nic_id="42424650-4c32-3533-3330-323934000000", - pcie_bdf="0000:06:00.0", - total_power_drawn=29.437, - core_power=12.375, - arm_power=0.788, - local_board_temperature=44.12, - die_temperature=45.59, - input_voltage=12078, - core_voltage=725, - core_frequency=1100, - cpu_frequency=1500, - p4_stage_frequency=1500, - ) - - env2 = PensandoNicEnvironment( - nic_id="42424650-4c32-3533-3731-304535000000", - pcie_bdf="0000:16:00.0", - total_power_drawn=28.968, - core_power=12.031, - arm_power=0.292, - local_board_temperature=42.62, - die_temperature=42.28, - input_voltage=12078, - core_voltage=725, - core_frequency=1100, - cpu_frequency=1500, - p4_stage_frequency=1500, - ) - - data = NetworkDataModel( - interfaces=[], - routes=[], - rules=[], - neighbors=[], - ethtool_info={}, - pensando_nic_environment=[env1, env2], - ) - - assert len(data.pensando_nic_environment) == 2 - assert data.pensando_nic_environment[0].nic_id == "42424650-4c32-3533-3330-323934000000" - assert data.pensando_nic_environment[0].pcie_bdf == "0000:06:00.0" - assert data.pensando_nic_environment[0].total_power_drawn == 29.437 - assert data.pensando_nic_environment[0].die_temperature == 45.59 - assert data.pensando_nic_environment[1].core_frequency == 1100 - - -def test_parse_nicctl_show_pcie_ats_multiple_entries(collector): - """Test parsing Pensando NIC PCIe ATS entry from nicctl show pcie ats output""" - pcie_ats_entries = collector._parse_nicctl_pcie_ats(NICCTL_SHOW_PCIE_ATS_OUTPUT) - - assert len(pcie_ats_entries) == 1 - - # Check entry - entry1 = pcie_ats_entries[0] - assert entry1.nic_id == "1111111-4c32-3533-3330-12345000000" - assert entry1.pcie_bdf == "0000:06:00.0" - assert entry1.status == "Disabled" - - -def test_parse_nicctl_show_pcie_ats_empty_output(collector): - """Test parsing empty nicctl show pcie ats output""" - pcie_ats_entries = collector._parse_nicctl_pcie_ats("") - - assert len(pcie_ats_entries) == 0 - - -def test_parse_nicctl_show_pcie_ats_enabled(collector): - """Test parsing nicctl show pcie ats output with Enabled status""" - enabled_output = """ -NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0) : Enabled -NIC : 42424650-4c32-3533-3731-304535000000 (0000:16:00.0) : Disabled -""" - - pcie_ats_entries = collector._parse_nicctl_pcie_ats(enabled_output) - - assert len(pcie_ats_entries) == 2 - assert pcie_ats_entries[0].status == "Enabled" - assert pcie_ats_entries[1].status == "Disabled" - - -def test_parse_nicctl_show_pcie_ats_malformed_output(collector): - """Test parsing malformed nicctl show pcie ats output gracefully""" - malformed = """some random text -not a valid pcie ats line -123 invalid format -""" - - pcie_ats_entries = collector._parse_nicctl_pcie_ats(malformed) - - # Should handle gracefully, return empty list - assert isinstance(pcie_ats_entries, list) - assert len(pcie_ats_entries) == 0 - - -def test_network_data_model_with_pensando_nic_pcie_ats(collector): - """Test creating NetworkDataModel with Pensando NIC PCIe ATS data""" - ats1 = PensandoNicPcieAts( - nic_id="42424650-4c32-3533-3330-323934000000", - pcie_bdf="0000:06:00.0", - status="Disabled", - ) - - ats2 = PensandoNicPcieAts( - nic_id="42424650-4c32-3533-3731-304535000000", - pcie_bdf="0000:16:00.0", - status="Enabled", - ) - - data = NetworkDataModel( - interfaces=[], - routes=[], - rules=[], - neighbors=[], - ethtool_info={}, - pensando_nic_pcie_ats=[ats1, ats2], - ) - - assert len(data.pensando_nic_pcie_ats) == 2 - assert data.pensando_nic_pcie_ats[0].nic_id == "42424650-4c32-3533-3330-323934000000" - assert data.pensando_nic_pcie_ats[0].pcie_bdf == "0000:06:00.0" - assert data.pensando_nic_pcie_ats[0].status == "Disabled" - assert data.pensando_nic_pcie_ats[1].status == "Enabled" - - -def test_parse_nicctl_show_port_multiple_entries(collector): - """Test parsing Pensando NIC port entry from nicctl show port output""" - port_entries = collector._parse_nicctl_port(NICCTL_SHOW_PORT_OUTPUT) - - assert len(port_entries) == 1 - - # Check entry - entry1 = port_entries[0] - assert entry1.nic_id == "1111111-4c32-3533-3330-12345000000" - assert entry1.pcie_bdf == "0000:06:00.0" - assert entry1.port_id == "555555a-6c40-4242-4242-000011010000" - assert entry1.port_name == "eth1/1" - # Spec fields - assert entry1.spec_ifindex == "0x11010000" - assert entry1.spec_type == "ETH" - assert entry1.spec_speed == "400G" - assert entry1.spec_admin_state == "UP" - assert entry1.spec_fec_type == "RS" - assert entry1.spec_pause_type == "PFC" - assert entry1.spec_num_lanes == 4 - assert entry1.spec_mtu == 9216 - assert entry1.spec_tx_pause == "enabled" - assert entry1.spec_rx_pause == "enabled" - assert entry1.spec_auto_negotiation == "disabled" - # Status fields - assert entry1.status_physical_port == 1 - assert entry1.status_operational_status == "DOWN" - assert entry1.status_link_fsm_state == "SIGNAL_DETECT" - assert entry1.status_fec_type == "RS" - assert entry1.status_cable_type == "Copper" - assert entry1.status_num_lanes == 4 - assert entry1.status_speed == "400G" - assert entry1.status_auto_negotiation == "disabled" - assert entry1.status_mac_id == 0 - assert entry1.status_mac_channel == 0 - assert entry1.status_mac_address == "04:90:81:4a:6c:40" - assert entry1.status_transceiver_type == "QSFP_CMIS" - assert entry1.status_transceiver_state == "SPROM-READ" - assert entry1.status_transceiver_pid == "QSFP-400G-CR4" - - -def test_parse_nicctl_show_port_empty_output(collector): - """Test parsing empty nicctl show port output""" - port_entries = collector._parse_nicctl_port("") - - assert len(port_entries) == 0 - - -def test_parse_nicctl_show_port_partial_fields(collector): - """Test parsing nicctl show port output with partial fields""" - partial_output = """ -NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0) - -Port : 0490814a-6c40-4242-4242-000011010000 (eth1/1) - Spec: - speed : 400G - Admin state : UP - Status: - Operational status : DOWN -------------------------------------------------------------------------------------- -""" - - port_entries = collector._parse_nicctl_port(partial_output) - - assert len(port_entries) == 1 - - # Entry with only some fields - entry1 = port_entries[0] - assert entry1.nic_id == "42424650-4c32-3533-3330-323934000000" - assert entry1.pcie_bdf == "0000:06:00.0" - assert entry1.port_name == "eth1/1" - assert entry1.spec_speed == "400G" - assert entry1.spec_admin_state == "UP" - assert entry1.status_operational_status == "DOWN" - assert entry1.spec_mtu is None - assert entry1.status_mac_address is None - - -def test_parse_nicctl_show_port_malformed_output(collector): - """Test parsing malformed nicctl show port output gracefully""" - malformed = """some random text -not a valid port line -123 invalid format -""" - - port_entries = collector._parse_nicctl_port(malformed) - - # Should handle gracefully, return empty list - assert isinstance(port_entries, list) - assert len(port_entries) == 0 - - -def test_network_data_model_with_pensando_nic_port(collector): - """Test creating NetworkDataModel with Pensando NIC port data""" - port1 = PensandoNicPort( - nic_id="42424650-4c32-3533-3330-323934000000", - pcie_bdf="0000:06:00.0", - port_id="0490814a-6c40-4242-4242-000011010000", - port_name="eth1/1", - spec_speed="400G", - spec_admin_state="UP", - spec_mtu=9216, - status_operational_status="DOWN", - status_mac_address="04:90:81:4a:6c:40", - ) - - port2 = PensandoNicPort( - nic_id="42424650-4c32-3533-3731-304535000000", - pcie_bdf="0000:16:00.0", - port_id="0490815c-ce50-4242-4242-000011010000", - port_name="eth1/1", - spec_speed="400G", - spec_admin_state="UP", - spec_mtu=9216, - status_operational_status="UP", - status_mac_address="04:90:81:5c:ce:50", - ) - - data = NetworkDataModel( - interfaces=[], - routes=[], - rules=[], - neighbors=[], - ethtool_info={}, - pensando_nic_ports=[port1, port2], - ) - - assert len(data.pensando_nic_ports) == 2 - assert data.pensando_nic_ports[0].nic_id == "42424650-4c32-3533-3330-323934000000" - assert data.pensando_nic_ports[0].port_name == "eth1/1" - assert data.pensando_nic_ports[0].spec_speed == "400G" - assert data.pensando_nic_ports[0].status_mac_address == "04:90:81:4a:6c:40" - assert data.pensando_nic_ports[1].status_operational_status == "UP" - - -def test_parse_nicctl_show_qos_multiple_entries(collector): - """Test parsing Pensando NIC QoS entry from nicctl show qos output""" - qos_entries = collector._parse_nicctl_qos(NICCTL_SHOW_QOS_OUTPUT) - - assert len(qos_entries) == 1 - - # Check entry - entry1 = qos_entries[0] - assert entry1.nic_id == "1111111-4c32-3533-3330-12345000000" - assert entry1.pcie_bdf == "0000:06:00.0" - assert entry1.port_id == "0490814a-6c40-4242-4242-000011010000" - assert entry1.classification_type == "DSCP" - assert entry1.dscp_bitmap == "0xffffffffffffffff" - assert entry1.dscp_range == "0-63" - assert entry1.dscp_priority == 0 - assert entry1.pfc_priority_bitmap == "0x0" - assert entry1.pfc_no_drop_priorities == "" - assert len(entry1.scheduling) == 1 - assert entry1.scheduling[0].priority == 0 - assert entry1.scheduling[0].scheduling_type == "DWRR" - assert entry1.scheduling[0].bandwidth == 0 - assert entry1.scheduling[0].rate_limit == "N/A" - - -def test_parse_nicctl_show_qos_empty_output(collector): - """Test parsing empty nicctl show qos output""" - qos_entries = collector._parse_nicctl_qos("") - - assert len(qos_entries) == 0 - - -def test_parse_nicctl_show_qos_malformed_output(collector): - """Test parsing malformed nicctl show qos output gracefully""" - malformed = """some random text -not a valid qos line -123 invalid format -""" - - qos_entries = collector._parse_nicctl_qos(malformed) - - # Should handle gracefully, return empty list - assert isinstance(qos_entries, list) - assert len(qos_entries) == 0 - - -def test_network_data_model_with_pensando_nic_qos(collector): - """Test creating NetworkDataModel with Pensando NIC QoS data""" - sched1 = PensandoNicQosScheduling( - priority=0, - scheduling_type="DWRR", - bandwidth=0, - rate_limit="N/A", - ) - - qos1 = PensandoNicQos( - nic_id="42424650-4c32-3533-3330-323934000000", - pcie_bdf="0000:06:00.0", - port_id="0490814a-6c40-4242-4242-000011010000", - classification_type="DSCP", - dscp_bitmap="0xffffffffffffffff", - dscp_range="0-63", - dscp_priority=0, - pfc_priority_bitmap="0x0", - pfc_no_drop_priorities="", - scheduling=[sched1], - ) - - qos2 = PensandoNicQos( - nic_id="42424650-4c32-3533-3731-304535000000", - pcie_bdf="0000:16:00.0", - port_id="0490815c-ce50-4242-4242-000011010000", - classification_type="DSCP", - ) - - data = NetworkDataModel( - interfaces=[], - routes=[], - rules=[], - neighbors=[], - ethtool_info={}, - pensando_nic_qos=[qos1, qos2], - ) - - assert len(data.pensando_nic_qos) == 2 - assert data.pensando_nic_qos[0].nic_id == "42424650-4c32-3533-3330-323934000000" - assert data.pensando_nic_qos[0].port_id == "0490814a-6c40-4242-4242-000011010000" - assert data.pensando_nic_qos[0].classification_type == "DSCP" - assert len(data.pensando_nic_qos[0].scheduling) == 1 - assert data.pensando_nic_qos[1].nic_id == "42424650-4c32-3533-3731-304535000000" - - -# Mock output for 'nicctl show rdma statistics' -NICCTL_SHOW_RDMA_STATISTICS_OUTPUT = """NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0) - ------------------------------------------------------------- -Name Count ------------------------------------------------------------- -Queue pair create 1 -Completion queue create 2 - -NIC : 42424650-4c32-3533-3731-304535000000 (0000:16:00.0) - ------------------------------------------------------------- -Name Count ------------------------------------------------------------- -Queue pair create 1 -Completion queue create 2 -""" - - -def test_parse_nicctl_show_rdma_statistics_multiple_entries(collector): - """Test parsing multiple NIC RDMA statistics entries.""" - entries = collector._parse_nicctl_rdma_statistics(NICCTL_SHOW_RDMA_STATISTICS_OUTPUT) - - assert len(entries) == 2 - - # Check first entry - assert entries[0].nic_id == "42424650-4c32-3533-3330-323934000000" - assert entries[0].pcie_bdf == "0000:06:00.0" - assert len(entries[0].statistics) == 2 - assert entries[0].statistics[0].name == "Queue pair create" - assert entries[0].statistics[0].count == 1 - assert entries[0].statistics[1].name == "Completion queue create" - assert entries[0].statistics[1].count == 2 - - # Check second entry - assert entries[1].nic_id == "42424650-4c32-3533-3731-304535000000" - assert entries[1].pcie_bdf == "0000:16:00.0" - assert len(entries[1].statistics) == 2 - assert entries[1].statistics[0].name == "Queue pair create" - assert entries[1].statistics[0].count == 1 - assert entries[1].statistics[1].name == "Completion queue create" - assert entries[1].statistics[1].count == 2 - - -def test_parse_nicctl_show_rdma_statistics_empty_output(collector): - """Test parsing empty RDMA statistics output.""" - entries = collector._parse_nicctl_rdma_statistics("") - assert len(entries) == 0 - - -# Mock output for 'nicctl show version host-software' -NICCTL_SHOW_VERSION_HOST_SOFTWARE_OUTPUT = """nicctl : 1.117.1-a-63 -IPC driver : 1.117.1.a.63 -ionic driver : 25.08.4.004 -""" - - -def test_parse_nicctl_show_version_host_software(collector): - """Test parsing host software version.""" - version = collector._parse_nicctl_version_host_software( - NICCTL_SHOW_VERSION_HOST_SOFTWARE_OUTPUT - ) - - assert version is not None - assert version.nicctl == "1.117.1-a-63" - assert version.ipc_driver == "1.117.1.a.63" - assert version.ionic_driver == "25.08.4.004" - - -def test_parse_nicctl_show_version_host_software_empty_output(collector): - """Test parsing empty host software version output.""" - version = collector._parse_nicctl_version_host_software("") - assert version is None - - -# Mock output for 'nicctl show version firmware' -NICCTL_SHOW_VERSION_FIRMWARE_OUTPUT = """NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0) - -CPLD : 3.16 (primary) -Boot0 : 21 -Uboot-A : 1.117.1-a-63 -Firmware-A : 1.117.1-a-63 -Device config-A : device_config_rdma_1x400G/1.0.0 -------------------------------------------------------------------------------------- - -NIC : 42424650-4c32-3533-3731-304535000000 (0000:16:00.0) - -CPLD : 3.16 (primary) -Boot0 : 21 -Uboot-A : 1.117.1-a-63 -Firmware-A : 1.117.1-a-63 -Device config-A : device_config_rdma_1x400G/1.0.0 -------------------------------------------------------------------------------------- -""" - - -def test_parse_nicctl_show_version_firmware_multiple_entries(collector): - """Test parsing multiple NIC firmware version entries.""" - entries = collector._parse_nicctl_version_firmware(NICCTL_SHOW_VERSION_FIRMWARE_OUTPUT) - - assert len(entries) == 2 - - # Check first entry - assert entries[0].nic_id == "42424650-4c32-3533-3330-323934000000" - assert entries[0].pcie_bdf == "0000:06:00.0" - assert entries[0].cpld == "3.16 (primary)" - assert entries[0].boot0 == "21" - assert entries[0].uboot_a == "1.117.1-a-63" - assert entries[0].firmware_a == "1.117.1-a-63" - assert entries[0].device_config_a == "device_config_rdma_1x400G/1.0.0" - - # Check second entry - assert entries[1].nic_id == "42424650-4c32-3533-3731-304535000000" - assert entries[1].pcie_bdf == "0000:16:00.0" - assert entries[1].cpld == "3.16 (primary)" - assert entries[1].boot0 == "21" - assert entries[1].uboot_a == "1.117.1-a-63" - assert entries[1].firmware_a == "1.117.1-a-63" - assert entries[1].device_config_a == "device_config_rdma_1x400G/1.0.0" - - -def test_parse_nicctl_show_version_firmware_empty_output(collector): - """Test parsing empty firmware version output.""" - entries = collector._parse_nicctl_version_firmware("") - assert len(entries) == 0 - - -def test_network_data_model_with_pensando_nic_rdma_statistics(): - """Test NetworkDataModel with Pensando NIC RDMA statistics.""" - from nodescraper.plugins.inband.network.networkdata import ( - NetworkDataModel, - PensandoNicRdmaStatistic, - PensandoNicRdmaStatistics, - ) - - data = NetworkDataModel( - pensando_nic_rdma_statistics=[ - PensandoNicRdmaStatistics( - nic_id="42424650-4c32-3533-3330-323934000000", - pcie_bdf="0000:06:00.0", - statistics=[ - PensandoNicRdmaStatistic(name="Queue pair create", count=1), - PensandoNicRdmaStatistic(name="Completion queue create", count=2), - ], - ) - ] - ) - - assert len(data.pensando_nic_rdma_statistics) == 1 - assert data.pensando_nic_rdma_statistics[0].nic_id == "42424650-4c32-3533-3330-323934000000" - assert len(data.pensando_nic_rdma_statistics[0].statistics) == 2 - - -def test_network_data_model_with_pensando_nic_version_host_software(): - """Test NetworkDataModel with Pensando NIC host software version.""" - from nodescraper.plugins.inband.network.networkdata import ( - NetworkDataModel, - PensandoNicVersionHostSoftware, - ) - - data = NetworkDataModel( - pensando_nic_version_host_software=PensandoNicVersionHostSoftware( - nicctl="1.117.1-a-63", - ipc_driver="1.117.1.a.63", - ionic_driver="25.08.4.004", - ) - ) - - assert data.pensando_nic_version_host_software is not None - assert data.pensando_nic_version_host_software.nicctl == "1.117.1-a-63" - assert data.pensando_nic_version_host_software.ipc_driver == "1.117.1.a.63" - assert data.pensando_nic_version_host_software.ionic_driver == "25.08.4.004" - - -def test_network_data_model_with_pensando_nic_version_firmware(): - """Test NetworkDataModel with Pensando NIC firmware versions.""" - from nodescraper.plugins.inband.network.networkdata import ( - NetworkDataModel, - PensandoNicVersionFirmware, - ) - - data = NetworkDataModel( - pensando_nic_version_firmware=[ - PensandoNicVersionFirmware( - nic_id="42424650-4c32-3533-3330-323934000000", - pcie_bdf="0000:06:00.0", - cpld="3.16 (primary)", - boot0="21", - uboot_a="1.117.1-a-63", - firmware_a="1.117.1-a-63", - device_config_a="device_config_rdma_1x400G/1.0.0", - ) - ] - ) - - assert len(data.pensando_nic_version_firmware) == 1 - assert data.pensando_nic_version_firmware[0].nic_id == "42424650-4c32-3533-3330-323934000000" - assert data.pensando_nic_version_firmware[0].cpld == "3.16 (primary)" - - def test_network_accessibility_linux_success(collector, conn_mock): """Test network accessibility check on Linux with successful ping""" collector.system_info.os_family = OSFamily.LINUX diff --git a/test/unit/plugin/test_niccli_collector.py b/test/unit/plugin/test_niccli_collector.py new file mode 100644 index 00000000..c4e5adef --- /dev/null +++ b/test/unit/plugin/test_niccli_collector.py @@ -0,0 +1,270 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +############################################################################### +from unittest.mock import MagicMock + +import pytest + +from nodescraper.enums.executionstatus import ExecutionStatus +from nodescraper.enums.systeminteraction import SystemInteractionLevel +from nodescraper.models.systeminfo import OSFamily +from nodescraper.plugins.inband.nic.nic_collector import NicCollector +from nodescraper.plugins.inband.nic.nic_data import ( + NicCliDevice, + NicCliQos, + NicDataModel, + PensandoNicCard, +) + + +@pytest.fixture +def collector(system_info, conn_mock): + return NicCollector( + system_info=system_info, + system_interaction_level=SystemInteractionLevel.PASSIVE, + connection=conn_mock, + ) + + +NICCLI_LISTDEV_OUTPUT = """1) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#1 Port#1) + Device Interface : abcd1p1 + MAC Address : 81:82:83:84:85:88 + PCI Address : 0000:22:00.0 +""" + +NICCLI_QOS_OUTPUT = """IEEE 8021QAZ ETS Configuration TLV: + PRIO_MAP: 0:0 1:0 2:0 3:1 4:0 5:0 6:0 7:2 + TC Bandwidth: 50% 50% 0% + TSA_MAP: 0:ets 1:ets 2:strict +IEEE 8021QAZ PFC TLV: + PFC enabled: 3 +IEEE 8021QAZ APP TLV: + APP#0: + Priority: 7 + Sel: 5 + DSCP: 48 + + APP#1: + Priority: 3 + Sel: 5 + DSCP: 26 + + APP#2: + Priority: 3 + Sel: 3 + UDP or DCCP: 4791 + +TC Rate Limit: 100% 100% 100% 0% 0% 0% 0% 0% +""" + + +def test_parse_niccli_listdev_device(collector): + """Test parsing Broadcom NIC device from niccli --list_devices output.""" + devices = collector._parse_niccli_listdev(NICCLI_LISTDEV_OUTPUT) + + assert len(devices) == 1 + device1 = devices[0] + assert device1.device_num == 1 + assert device1.model == "Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC" + assert device1.adapter_port == "Adp#1 Port#1" + assert device1.interface_name == "abcd1p1" + assert device1.mac_address == "81:82:83:84:85:88" + assert device1.pci_address == "0000:22:00.0" + + +def test_parse_niccli_listdev_empty_output(collector): + """Test parsing empty niccli --list_devices output.""" + devices = collector._parse_niccli_listdev("") + assert len(devices) == 0 + + +def test_parse_niccli_listdev_malformed_output(collector): + """Test parsing malformed niccli --list_devices output gracefully.""" + malformed = """some random text +not a valid device line +123 invalid format +""" + devices = collector._parse_niccli_listdev(malformed) + assert isinstance(devices, list) + + +def test_parse_niccli_qos_complete(collector): + """Test parsing complete Broadcom NIC QoS output with all fields.""" + qos = collector._parse_niccli_qos(1, NICCLI_QOS_OUTPUT) + + assert qos.device_num == 1 + assert qos.raw_output == NICCLI_QOS_OUTPUT + assert len(qos.prio_map) == 8 + assert qos.prio_map[0] == 0 + assert qos.prio_map[3] == 1 + assert qos.prio_map[7] == 2 + assert len(qos.tc_bandwidth) == 3 + assert qos.tc_bandwidth[0] == 50 + assert qos.tc_bandwidth[1] == 50 + assert qos.tc_bandwidth[2] == 0 + assert len(qos.tsa_map) == 3 + assert qos.tsa_map[0] == "ets" + assert qos.tsa_map[2] == "strict" + assert qos.pfc_enabled == 3 + assert len(qos.app_entries) == 3 + assert qos.app_entries[0].priority == 7 + assert qos.app_entries[0].sel == 5 + assert qos.app_entries[0].dscp == 48 + assert qos.app_entries[2].protocol == "UDP or DCCP" + assert qos.app_entries[2].port == 4791 + assert len(qos.tc_rate_limit) == 8 + assert qos.tc_rate_limit[0] == 100 + + +def test_parse_niccli_qos_empty_output(collector): + """Test parsing empty QoS output.""" + qos = collector._parse_niccli_qos(1, "") + assert qos.device_num == 1 + assert qos.raw_output == "" + assert len(qos.prio_map) == 0 + assert len(qos.tc_bandwidth) == 0 + assert len(qos.tsa_map) == 0 + assert qos.pfc_enabled is None + assert len(qos.app_entries) == 0 + assert len(qos.tc_rate_limit) == 0 + + +def test_parse_niccli_qos_multiple_app_protocols(collector): + """Test parsing QoS with APP entries having different protocols.""" + qos_multi_protocol = """IEEE 8021QAZ ETS Configuration TLV: + PRIO_MAP: 0:0 + TC Bandwidth: 100% + TSA_MAP: 0:ets +IEEE 8021QAZ PFC TLV: + PFC enabled: 0 +IEEE 8021QAZ APP TLV: + APP#0: + Priority: 5 + Sel: 3 + TCP: 8080 + + APP#1: + Priority: 6 + Sel: 3 + UDP: 9000 + +TC Rate Limit: 100% +""" + qos = collector._parse_niccli_qos(3, qos_multi_protocol) + assert len(qos.app_entries) == 2 + assert qos.app_entries[0].priority == 5 + assert qos.app_entries[0].sel == 3 + assert qos.app_entries[0].protocol == "TCP" + assert qos.app_entries[0].port == 8080 + assert qos.app_entries[1].priority == 6 + assert qos.app_entries[1].protocol == "UDP" + assert qos.app_entries[1].port == 9000 + + +def test_parse_niccli_qos_malformed_values(collector): + """Test parsing QoS output with malformed values gracefully.""" + malformed = """IEEE 8021QAZ ETS Configuration TLV: + PRIO_MAP: 0:invalid 1:1 bad:data + TC Bandwidth: 50% invalid 50% + TSA_MAP: 0:ets bad:value 1:strict +IEEE 8021QAZ PFC TLV: + PFC enabled: not_a_number +TC Rate Limit: 100% bad% 100% +""" + qos = collector._parse_niccli_qos(1, malformed) + assert qos.device_num == 1 + assert 1 in qos.prio_map + assert qos.prio_map[1] == 1 + assert 50 in qos.tc_bandwidth + assert qos.tsa_map.get(0) == "ets" + assert qos.tsa_map.get(1) == "strict" + assert qos.pfc_enabled is None + + +def test_nic_data_model_with_broadcom_nic(collector): + """Test creating NicDataModel with Broadcom NIC data.""" + device = NicCliDevice( + device_num=1, + model="Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC", + adapter_port="Adp#1 Port#1", + interface_name="benic1p1", + mac_address="8C:84:74:37:C3:70", + pci_address="0000:06:00.0", + ) + qos = NicCliQos( + device_num=1, + raw_output="test output", + prio_map={0: 0, 1: 1}, + tc_bandwidth=[50, 50], + tsa_map={0: "ets", 1: "strict"}, + pfc_enabled=3, + tc_rate_limit=[100, 100], + ) + data = NicDataModel( + broadcom_nic_devices=[device], + broadcom_nic_qos={1: qos}, + ) + assert len(data.broadcom_nic_devices) == 1 + assert len(data.broadcom_nic_qos) == 1 + assert data.broadcom_nic_devices[0].device_num == 1 + assert data.broadcom_nic_devices[0].interface_name == "benic1p1" + assert data.broadcom_nic_qos[1].device_num == 1 + assert data.broadcom_nic_qos[1].pfc_enabled == 3 + + +def test_nic_data_model_with_pensando_nic(collector): + """Test creating NicDataModel with Pensando NIC data.""" + card1 = PensandoNicCard( + id="42424650-4c32-3533-3330-323934000000", + pcie_bdf="0000:06:00.0", + asic="salina", + fw_partition="A", + serial_number="FPL25330294", + ) + card2 = PensandoNicCard( + id="42424650-4c32-3533-3731-304535000000", + pcie_bdf="0000:16:00.0", + asic="salina", + fw_partition="A", + serial_number="FPL253710E5", + ) + data = NicDataModel( + pensando_nic_cards=[card1, card2], + ) + assert len(data.pensando_nic_cards) == 2 + assert data.pensando_nic_cards[0].id == "42424650-4c32-3533-3330-323934000000" + assert data.pensando_nic_cards[0].pcie_bdf == "0000:06:00.0" + assert data.pensando_nic_cards[0].asic == "salina" + assert data.pensando_nic_cards[1].serial_number == "FPL253710E5" + + +def test_collect_data_success(collector, conn_mock): + """Test successful collection of niccli/nicctl data.""" + collector.system_info.os_family = OSFamily.LINUX + + def run_sut_cmd_side_effect(cmd, **kwargs): + if "niccli" in cmd and ("--list" in cmd or "--list_devices" in cmd): + return MagicMock(exit_code=0, stdout=NICCLI_LISTDEV_OUTPUT, stderr="", command=cmd) + if cmd.strip() == "nicctl show card": + return MagicMock( + exit_code=0, + stdout="1111111-4c32-3533-3330-12345000000 0000:06:00.0\n", + stderr="", + command=cmd, + ) + if "nicctl" in cmd or "niccli" in cmd: + return MagicMock(exit_code=0, stdout="", stderr="", command=cmd) + return MagicMock(exit_code=1, stdout="", stderr="", command=cmd) + + collector._run_sut_cmd = MagicMock(side_effect=run_sut_cmd_side_effect) + + result, data = collector.collect_data() + + assert result.status == ExecutionStatus.OK + assert data is not None + assert isinstance(data, NicDataModel) + assert len(data.results) >= 1