Manage blade expansions as independent devices

This patch adds the ability to detect and manage GPU and Disk expansion
bays, and either add their internal components into the device
corresponding to the blade server, or into a dedicated device.

It takes advantage of the work made by @cyrinux on GPU bays management, and
applies the same principle to the external disk bays, but harmonize the
inventory management:

- If no argument is specified on the command line, the GPU cards, RAID
  controllers and their attached disks are added in the blade device,
  and the device corresponding to an expansion device is deleted.
- If the `--expansion-as-device` option is specified on the command
  line, a dedicated device corresponding to the expansion bay is
  created, and the GPUs, RAID card and attached disks are removed from
  the blade device and added to the expansion device.
This commit is contained in:
Christophe Simon 2022-02-11 18:22:13 +01:00
parent 8a46af19b8
commit 2f09cf8d42
6 changed files with 195 additions and 175 deletions

View file

@ -28,6 +28,8 @@ def get_config():
p.add_argument('--update-inventory', action='store_true', help='Update inventory') p.add_argument('--update-inventory', action='store_true', help='Update inventory')
p.add_argument('--update-location', action='store_true', help='Update location') p.add_argument('--update-location', action='store_true', help='Update location')
p.add_argument('--update-psu', action='store_true', help='Update PSU') p.add_argument('--update-psu', action='store_true', help='Update PSU')
p.add_argument('--expansion-as-device', action='store_true',
help='Manage blade expansions as external devices')
p.add_argument('--log_level', default='debug') p.add_argument('--log_level', default='debug')
p.add_argument('--netbox.url', help='Netbox URL') p.add_argument('--netbox.url', help='Netbox URL')

View file

@ -46,10 +46,11 @@ class Inventory():
- no scan of NVMe devices - no scan of NVMe devices
""" """
def __init__(self, server): def __init__(self, server, update_expansion=False):
self.create_netbox_tags() self.create_netbox_tags()
self.server = server self.server = server
netbox_server = self.server.get_netbox_server() self.update_expansion = update_expansion
netbox_server = self.server.get_netbox_server(update_expansion)
self.device_id = netbox_server.id if netbox_server else None self.device_id = netbox_server.id if netbox_server else None
self.raid = None self.raid = None
@ -220,7 +221,7 @@ class Inventory():
self.create_netbox_cpus() self.create_netbox_cpus()
def get_raid_cards(self): def get_raid_cards(self, filter_cards=False):
raid_class = None raid_class = None
if self.server.manufacturer == 'Dell': if self.server.manufacturer == 'Dell':
if is_tool('omreport'): if is_tool('omreport'):
@ -235,9 +236,15 @@ class Inventory():
return [] return []
self.raid = raid_class() self.raid = raid_class()
controllers = self.raid.get_controllers()
if len(self.raid.get_controllers()): if filter_cards and config.expansion_as_device \
return controllers and self.server.own_expansion_slot():
return [
c for c in self.raid.get_controllers()
if c.is_external() is self.update_expansion
]
else:
return self.raid.get_controllers()
def create_netbox_raid_card(self, raid_card): def create_netbox_raid_card(self, raid_card):
manufacturer = self.find_or_create_manufacturer( manufacturer = self.find_or_create_manufacturer(
@ -276,7 +283,7 @@ class Inventory():
device_id=self.device_id, device_id=self.device_id,
tag=[INVENTORY_TAG['raid_card']['slug']] tag=[INVENTORY_TAG['raid_card']['slug']]
) )
raid_cards = self.get_raid_cards() raid_cards = self.get_raid_cards(filter_cards=True)
# delete cards that are in netbox but not locally # delete cards that are in netbox but not locally
# use the serial_number has the comparison element # use the serial_number has the comparison element
@ -336,7 +343,7 @@ class Inventory():
d['Vendor'] = get_vendor(disk['product']) d['Vendor'] = get_vendor(disk['product'])
disks.append(d) disks.append(d)
for raid_card in self.get_raid_cards(): for raid_card in self.get_raid_cards(filter_cards=True):
disks += raid_card.get_physical_disks() disks += raid_card.get_physical_disks()
# remove duplicate serials # remove duplicate serials
@ -463,21 +470,24 @@ class Inventory():
tag=INVENTORY_TAG['gpu']['slug'], tag=INVENTORY_TAG['gpu']['slug'],
) )
if not len(nb_gpus) or \ if config.expansion_as_device and len(nb_gpus):
for x in nb_gpus:
x.delete()
elif not len(nb_gpus) or \
len(nb_gpus) and len(gpus) != len(nb_gpus): len(nb_gpus) and len(gpus) != len(nb_gpus):
for x in nb_gpus: for x in nb_gpus:
x.delete() x.delete()
self.create_netbox_gpus() self.create_netbox_gpus()
def create_or_update(self): def create_or_update(self):
if config.inventory is None or config.update_inventory is None: if config.inventory is None or config.update_inventory is None:
return False return False
self.do_netbox_cpus() if self.update_expansion is False:
self.do_netbox_memories() self.do_netbox_cpus()
self.do_netbox_raid_cards() self.do_netbox_memories()
self.do_netbox_disks() self.do_netbox_interfaces()
self.do_netbox_interfaces() self.do_netbox_motherboard()
self.do_netbox_motherboard()
self.do_netbox_gpus() self.do_netbox_gpus()
self.do_netbox_disks()
self.do_netbox_raid_cards()
return True return True

View file

@ -15,6 +15,9 @@ class RaidController():
def get_physical_disks(self): def get_physical_disks(self):
raise NotImplementedError raise NotImplementedError
def is_external(self):
return False
class Raid(): class Raid():
def get_controllers(self): def get_controllers(self):

View file

@ -1,106 +1,65 @@
import re import re
import subprocess import subprocess
from netbox_agent.config import config
from netbox_agent.misc import get_vendor from netbox_agent.misc import get_vendor
from netbox_agent.raid.base import Raid, RaidController from netbox_agent.raid.base import Raid, RaidController
REGEXP_CONTROLLER_HP = re.compile(r'Smart Array ([a-zA-Z0-9- ]+) in Slot ([0-9]+)') REGEXP_CONTROLLER_HP = re.compile(r'Smart Array ([a-zA-Z0-9- ]+) in Slot ([0-9]+)')
def _get_indentation(string): def _parse_ctrl_output(lines):
"""Return the number of spaces before the current line.""" controllers = {}
return len(string) - len(string.lstrip(' ')) current_ctrl = None
for line in lines:
def _get_key_value(string): if not line or line.startswith('Note:'):
"""Return the (key, value) as a tuple from a string."""
# Normally all properties look like this:
# Unique Identifier: 600508B1001CE4ACF473EE9C826230FF
# Disk Name: /dev/sda
# Mount Points: None
key = ''
value = ''
try:
key, value = string.split(':')
except ValueError:
# This handles the case when the property of a logical drive
# returned is as follows. Here we cannot split by ':' because
# the disk id has colon in it. So if this is about disk,
# then strip it accordingly.
# Mirror Group 0: physicaldrive 6I:1:5
string = string.lstrip(' ')
if string.startswith('physicaldrive'):
fields = string.split(' ')
key = fields[0]
value = fields[1]
else:
# TODO(rameshg87): Check if this ever occurs.
return None, None
return key.lstrip(' ').rstrip(' '), value.lstrip(' ').rstrip(' ')
def _get_dict(lines, start_index, indentation):
"""Recursive function for parsing hpssacli/ssacli output."""
info = {}
current_item = None
i = start_index
while i < len(lines):
current_line = lines[i]
if current_line.startswith('Note:'):
i = i + 1
continue continue
ctrl = REGEXP_CONTROLLER_HP.search(line)
current_line_indentation = _get_indentation(current_line) if ctrl is not None:
# This check ignore some useless information that make current_ctrl = ctrl.group(1)
# crash the parsing controllers[current_ctrl] = {"Slot": ctrl.group(2)}
product_name = REGEXP_CONTROLLER_HP.search(current_line) if "Embedded" not in line:
if current_line_indentation == 0 and not product_name: controllers[current_ctrl]["External"] = True
i = i + 1
continue continue
attr, val = line.split(": ", 1)
attr = attr.strip()
val = val.strip()
controllers[current_ctrl][attr] = val
return controllers
if current_line_indentation == indentation:
current_item = current_line.lstrip(' ')
info[current_item] = {} def _parse_pd_output(lines):
i = i + 1 drives = {}
current_array = None
current_drv = None
for line in lines:
line = line.strip()
if not line or line.startswith('Note:'):
continue continue
# Parses the Array the drives are in
if i >= len(lines) - 1: if line.startswith("Array"):
key, value = _get_key_value(current_line) current_array = line.split(None, 1)[1]
# If this is some unparsable information, then # Detects new physical drive
# just skip it. if line.startswith("physicaldrive"):
if key: current_drv = line.split(None, 1)[1]
info[current_item][key] = value drives[current_drv] = {}
return info, i if current_array is not None:
drives[current_drv]["Array"] = current_array
next_line = lines[i + 1] continue
next_line_indentation = _get_indentation(next_line) if ": " not in line:
continue
if current_line_indentation == next_line_indentation: attr, val = line.split(": ", 1)
key, value = _get_key_value(current_line) drives.setdefault(current_drv, {})[attr] = val
if key: return drives
info[current_item][key] = value
i = i + 1
elif next_line_indentation > current_line_indentation:
ret_dict, j = _get_dict(lines, i, current_line_indentation)
info[current_item].update(ret_dict)
i = j + 1
elif next_line_indentation < current_line_indentation:
key, value = _get_key_value(current_line)
if key:
info[current_item][key] = value
return info, i
return info, i
class HPRaidController(RaidController): class HPRaidController(RaidController):
def __init__(self, controller_name, data): def __init__(self, controller_name, data):
self.controller_name = controller_name self.controller_name = controller_name
self.data = data self.data = data
self.drives = self._get_physical_disks()
def get_product_name(self): def get_product_name(self):
return self.controller_name return self.controller_name
@ -114,40 +73,42 @@ class HPRaidController(RaidController):
def get_firmware_version(self): def get_firmware_version(self):
return self.data['Firmware Version'] return self.data['Firmware Version']
def get_physical_disks(self): def is_external(self):
ret = [] return self.data.get('External', False)
def _get_physical_disks(self):
output = subprocess.getoutput( output = subprocess.getoutput(
'ssacli ctrl slot={slot} pd all show detail'.format(slot=self.data['Slot']) 'ssacli ctrl slot={slot} pd all show detail'.format(slot=self.data['Slot'])
) )
lines = output.split('\n') lines = output.split('\n')
lines = list(filter(None, lines)) lines = list(filter(None, lines))
j = -1 drives = _parse_pd_output(lines)
while j < len(lines): ret = []
info_dict, j = _get_dict(lines, j + 1, 0)
key = next(iter(info_dict)) for name, attrs in drives.items():
for array, physical_disk in info_dict[key].items(): model = attrs.get('Model', '').strip()
for _, pd_attr in physical_disk.items(): vendor = None
model = pd_attr.get('Model', '').strip() if model.startswith('HP'):
vendor = None vendor = 'HP'
if model.startswith('HP'): elif len(model.split()) > 1:
vendor = 'HP' vendor = get_vendor(model.split()[1])
elif len(model.split()) > 1: else:
vendor = get_vendor(model.split()[1]) vendor = get_vendor(model)
else:
vendor = get_vendor(model)
ret.append({ ret.append({
'Model': model, 'Model': model,
'Vendor': vendor, 'Vendor': vendor,
'SN': pd_attr.get('Serial Number', '').strip(), 'SN': attrs.get('Serial Number', '').strip(),
'Size': pd_attr.get('Size', '').strip(), 'Size': attrs.get('Size', '').strip(),
'Type': 'SSD' if pd_attr.get('Interface Type') == 'Solid State SATA' 'Type': 'SSD' if attrs.get('Interface Type') == 'Solid State SATA'
else 'HDD', else 'HDD',
'_src': self.__class__.__name__, '_src': self.__class__.__name__,
}) })
return ret return ret
def get_physical_disks(self):
return self.drives
class HPRaid(Raid): class HPRaid(Raid):
def __init__(self): def __init__(self):
@ -158,16 +119,11 @@ class HPRaid(Raid):
def convert_to_dict(self): def convert_to_dict(self):
lines = self.output.split('\n') lines = self.output.split('\n')
lines = list(filter(None, lines)) lines = list(filter(None, lines))
j = -1 controllers = _parse_ctrl_output(lines)
while j < len(lines): for controller, attrs in controllers.items():
info_dict, j = _get_dict(lines, j + 1, 0) self.controllers.append(
if len(info_dict.keys()): HPRaidController(controller, attrs)
_product_name = list(info_dict.keys())[0] )
product_name = REGEXP_CONTROLLER_HP.search(_product_name)
if product_name:
self.controllers.append(
HPRaidController(product_name.group(1), info_dict[_product_name])
)
def get_controllers(self): def get_controllers(self):
return self.controllers return self.controllers

View file

@ -25,6 +25,7 @@ class ServerBase():
self.bios = dmidecode.get_by_type(self.dmi, 'BIOS') self.bios = dmidecode.get_by_type(self.dmi, 'BIOS')
self.chassis = dmidecode.get_by_type(self.dmi, 'Chassis') self.chassis = dmidecode.get_by_type(self.dmi, 'Chassis')
self.system = dmidecode.get_by_type(self.dmi, 'System') self.system = dmidecode.get_by_type(self.dmi, 'System')
self.inventory = Inventory(server=self)
self.network = None self.network = None
@ -94,6 +95,19 @@ class ServerBase():
server.position = None server.position = None
return update, server return update, server
def update_netbox_expansion_location(self, server, expansion):
update = False
if expansion.tenant != server.tenant:
expansion.tenant = server.tenant
update = True
if expansion.site != server.site:
expansion.site = server.site
update = True
if expansion.rack != server.rack:
expansion.rack = server.rack
update = True
return update
def get_rack(self): def get_rack(self):
rack = Rack() rack = Rack()
return rack.get() return rack.get()
@ -249,8 +263,11 @@ class ServerBase():
) )
return new_server return new_server
def get_netbox_server(self): def get_netbox_server(self, expansion=False):
return nb.dcim.devices.get(serial=self.get_service_tag()) if expansion is False:
return nb.dcim.devices.get(serial=self.get_service_tag())
else:
return nb.dcim.devices.get(serial=self.get_expansion_service_tag())
def _netbox_set_or_update_blade_slot(self, server, chassis, datacenter): def _netbox_set_or_update_blade_slot(self, server, chassis, datacenter):
# before everything check if right chassis # before everything check if right chassis
@ -285,9 +302,9 @@ class ServerBase():
slot=slot slot=slot
)) ))
def _netbox_set_or_update_blade_expansion_slot(self, server, chassis, datacenter): def _netbox_set_or_update_blade_expansion_slot(self, expansion, chassis, datacenter):
# before everything check if right chassis # before everything check if right chassis
actual_device_bay = server.parent_device.device_bay if server.parent_device else None actual_device_bay = expansion.parent_device.device_bay if expansion.parent_device else None
actual_chassis = actual_device_bay.device if actual_device_bay else None actual_chassis = actual_device_bay.device if actual_device_bay else None
slot = self.get_blade_expansion_slot() slot = self.get_blade_expansion_slot()
if actual_chassis and \ if actual_chassis and \
@ -295,30 +312,28 @@ class ServerBase():
actual_device_bay.name == slot: actual_device_bay.name == slot:
return return
server.name += " expansion"
real_device_bays = nb.dcim.device_bays.filter( real_device_bays = nb.dcim.device_bays.filter(
device_id=chassis.id, device_id=chassis.id,
name=slot, name=slot,
) )
if len(real_device_bays) > 0: if len(real_device_bays) == 0:
logging.info(
'Setting device expansion ({serial}) new slot on {slot} '
'(Chassis {chassis_serial})..'.format(
serial=server.serial, slot=slot, chassis_serial=chassis.serial
))
# reset actual device bay if set
if actual_device_bay:
actual_device_bay.installed_device = None
actual_device_bay.save()
# setup new device bay
real_device_bay = real_device_bays[0]
real_device_bay.installed_device = server
real_device_bay.save()
else:
logging.error('Could not find slot {slot} expansion for chassis'.format( logging.error('Could not find slot {slot} expansion for chassis'.format(
slot=slot slot=slot
)) ))
return
logging.info(
'Setting device expansion ({serial}) new slot on {slot} '
'(Chassis {chassis_serial})..'.format(
serial=expansion.serial, slot=slot, chassis_serial=chassis.serial
))
# reset actual device bay if set
if actual_device_bay:
actual_device_bay.installed_device = None
actual_device_bay.save()
# setup new device bay
real_device_bay = real_device_bays[0]
real_device_bay.installed_device = expansion
real_device_bay.save()
def netbox_create_or_update(self, config): def netbox_create_or_update(self, config):
""" """
@ -360,9 +375,10 @@ class ServerBase():
if config.register or config.update_all or config.update_network: if config.register or config.update_all or config.update_network:
self.network = ServerNetwork(server=self) self.network = ServerNetwork(server=self)
self.network.create_or_update_netbox_network_cards() self.network.create_or_update_netbox_network_cards()
update_inventory = config.inventory and (config.register or
config.update_all or config.update_inventory)
# update inventory if feature is enabled # update inventory if feature is enabled
if config.inventory and (config.register or config.update_all or config.update_inventory): if update_inventory:
self.inventory = Inventory(server=self)
self.inventory.create_or_update() self.inventory.create_or_update()
# update psu # update psu
if config.register or config.update_all or config.update_psu: if config.register or config.update_all or config.update_psu:
@ -370,14 +386,21 @@ class ServerBase():
self.power.create_or_update_power_supply() self.power.create_or_update_power_supply()
self.power.report_power_consumption() self.power.report_power_consumption()
if self.own_expansion_slot(): expansion = nb.dcim.devices.get(serial=self.get_expansion_service_tag())
if self.own_expansion_slot() and config.expansion_as_device:
logging.debug('Update Server expansion...') logging.debug('Update Server expansion...')
expansion = nb.dcim.devices.get(serial=self.get_expansion_service_tag())
if not expansion: if not expansion:
expansion = self._netbox_create_blade_expansion(chassis, datacenter, tenant, rack) expansion = self._netbox_create_blade_expansion(chassis, datacenter, tenant, rack)
# set slot for blade expansion # set slot for blade expansion
self._netbox_set_or_update_blade_expansion_slot(expansion, chassis, datacenter) self._netbox_set_or_update_blade_expansion_slot(expansion, chassis, datacenter)
if update_inventory:
# Updates expansion inventory
inventory = Inventory(server=self, update_expansion=True)
inventory.create_or_update()
elif self.own_expansion_slot() and expansion:
expansion.delete()
expansion = None
update = 0 update = 0
# for every other specs # for every other specs
@ -386,6 +409,7 @@ class ServerBase():
update += 1 update += 1
server.name = self.get_hostname() server.name = self.get_hostname()
if sorted(set(server.tags)) != sorted(set(self.tags)): if sorted(set(server.tags)) != sorted(set(self.tags)):
server.tags = self.tags server.tags = self.tags
update += 1 update += 1
@ -396,6 +420,17 @@ class ServerBase():
if update: if update:
server.save() server.save()
if expansion:
update = 0
expansion_name = server.name + ' expansion'
if expansion.name != expansion_name:
expansion.name = expansion_name
update += 1
if self.update_netbox_expansion_location(server, expansion):
update += 1
if update:
expansion.save()
logging.debug('Finished updating Server!') logging.debug('Finished updating Server!')
def print_debug(self): def print_debug(self):

View file

@ -67,35 +67,49 @@ class HPHost(ServerBase):
return self.hp_rack_locator["Enclosure Serial"].strip() return self.hp_rack_locator["Enclosure Serial"].strip()
return self.get_service_tag() return self.get_service_tag()
def get_blade_expansion_slot(self):
"""
Expansion slot are always the compute bay number + 1
"""
if self.is_blade() and self.own_gpu_expansion_slot() or \
self.own_disk_expansion_slot() or True:
return 'Bay {}'.format(
str(int(self.hp_rack_locator['Server Bay'].strip()) + 1)
)
return None
def get_expansion_product(self): def get_expansion_product(self):
""" """
Get the extension slot that is on a pair slot number Get the extension slot that is on a pair slot number
next to the compute slot that is on an odd slot number next to the compute slot that is on an odd slot number
I only know on model of slot GPU extension card that. I only know on model of slot GPU extension card that.
""" """
if self.own_expansion_slot(): if self.own_gpu_expansion_slot():
return "ProLiant BL460c Graphics Expansion Blade" return "ProLiant BL460c Graphics Expansion Blade"
return None elif self.own_disk_expansion_slot():
return "ProLiant BL460c Disk Expansion Blade"
def is_expansion_slot(self, server):
"""
Return True if its an extension slot, based on the name
"""
return server.name.endswith(" expansion")
def get_blade_expansion_slot(self):
"""
Expansion slot are always the compute bay number + 1
"""
if self.is_blade() and self.own_expansion_slot():
return 'Bay {}'.format(
str(int(self.hp_rack_locator['Server Bay'].strip()) + 1)
)
return None return None
def own_expansion_slot(self): def own_expansion_slot(self):
"""
Say if the device can host an extension card based
on the product name
"""
return self.own_gpu_expansion_slot() or self.own_disk_expansion_slot()
def own_gpu_expansion_slot(self):
""" """
Say if the device can host an extension card based Say if the device can host an extension card based
on the product name on the product name
""" """
return self.get_product_name().endswith('Graphics Exp') return self.get_product_name().endswith('Graphics Exp')
def own_disk_expansion_slot(self):
"""
Say if the device can host an extension card based
on the product name
"""
for raid_card in self.inventory.get_raid_cards():
if self.is_blade() and raid_card.is_external():
return True
return False