aetherscale

[unmaintained] code for a cloud provider tutorial
Log | Files | Refs | README | LICENSE

commit f93c23a9682faa84c86110373022eac91901af9d
parent 0c8e58874594e9055d487e90ffa8cb6fbb8737cb
Author: Stefan Koch <programming@stefan-koch.name>
Date:   Sun, 20 Dec 2020 16:50:37 +0100

Merge branch 'feat-vpn'

Diffstat:
MREADME.md | 77+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Maetherscale/client.py | 11++++++++++-
Maetherscale/computing.py | 186+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Maetherscale/config.py | 11+++++++++++
Daetherscale/interfaces.py | 74--------------------------------------------------------------------------
Aaetherscale/networking.py | 134+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Maetherscale/qemu/runtime.py | 23++++++++++++++++++++++-
Maetherscale/services.py | 46+++++++++++++++++++++++++++++++++++++++++++++-
Aaetherscale/vpn/radvd.py | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aaetherscale/vpn/tinc.py | 133+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abin/setup-vpn-tap-vde.sh | 47+++++++++++++++++++++++++++++++++++++++++++++++
Mtests/conftest.py | 11++++++++++-
Mtests/test_computing.py | 7+++++--
Dtests/test_interfaces.py | 8--------
Atests/test_networking.py | 37+++++++++++++++++++++++++++++++++++++
Atests/test_radvd.py | 47+++++++++++++++++++++++++++++++++++++++++++++++
Mtests/test_services.py | 1+
17 files changed, 795 insertions(+), 130 deletions(-)

diff --git a/README.md b/README.md @@ -21,6 +21,16 @@ virtualenv venv && source venv/bin/activate pip install -e . ``` +### Operating System Changes + +I am trying to design aetherscale to run without root permissions for +as much as possible. +For some actions more permissions than a standard user usually has are +needed, though. This section will guide you through all of the changes +required to allow aetherscale itself to run as a standard user. + +#### Bridge Networking + Before you can start using the server you need to setup a TAP device to which VDE networking can connect. This is needed so that the started VMs can join the network. To be able to create a TAP device that is connected to your @@ -36,6 +46,73 @@ bin/setup-tap-vde.sh -u USER -i IP_ADDRESS -g GATEWAY -e PHYSICAL_DEVICE bin/setup-tap-vde.sh -u username -i 192.168.0.10/24 -g 192.168.0.1 -e eth0 ``` +#### VPN Networking + +It's possible to connect different VMs on possibly different hosts with a VPN. +For each virtual network a tinc instance is started. + +This means that we have to create an unknown number of networks dynamically +upon user request. To somehow solve the problem that as a standard user we +cannot modify network devices without the `CAP_NET_ADMIN` capability, we +create a pre-defined number of unconfigured network devices `net-vpn-1` to +`net-vpn-k`. +The mapping between the dummy VPN devices and user-chosen VPN names is then +done by aetherscale. + +To create the dummy VPN interfaces (including bridging) run: + +```bash +bin/setup-vpn-tap-vde.sh -u username -n 10 -p 2001:db8:85a3 +``` + +`-p` defines the IPv6 prefix from which submasks for private subnets will be +chosen. It must be a `/48` prefix from which each VPN will receive a `/64` +prefix. + +This is not a nice solution and ideally network interfaces should be +created on-the-fly, but Linux capabilities inheritance to subprocesses +seems quite complicated, and without inheritance we'd have to grant +`CAP_NET_ADMIN` to both `ip` and `tincd`. This might be undesired, because +then any user can change network devices. Another option could be to +assign `CAP_NET_ADMIN` to the user running aetherscale, but this seems to +[require changes to pam](https://unix.stackexchange.com/questions/454708/how-do-you-add-cap-sys-admin-permissions-to-user-in-centos-7) +and still seems to require inheritable capabilities to be set on each +binary that is to be executed. +While this in my opinion would be a reasonable choice for a production +program, it feels too heavy for a proof-of-concept tool. + +For IPv6 auto-configuration you must have a program for router +advertisement running, e.g. radvd. Since radvd requires `CAP_NET_RAW`, +you'll have to run it externally from aetherscale, aetherscale cannot +autostart it. + +An example configuration file for two pre-configured subnets could look like: + +``` +interface aeth-vpnbr-1 { + AdvSendAdvert on; + MinRtrAdvInterval 3; + MaxRtrAdvInterval 10; + prefix ::/64 { + AdvOnLink on; + AdvAutonomous on; + AdvRouterAddr off; + }; +}; + +interface aeth-vpnbr-2 { + AdvSendAdvert on; + MinRtrAdvInterval 3; + MaxRtrAdvInterval 10; + prefix ::/64 { + AdvOnLink on; + AdvAutonomous on; + AdvRouterAddr off; + }; +}; +``` + + ## Usage The server can be started with: diff --git a/aetherscale/client.py b/aetherscale/client.py @@ -68,6 +68,11 @@ def main(): create_vm_parser.add_argument( '--init-script', dest='init_script_path', help='Script to execute at first boot of VM', required=False) + create_vm_parser.add_argument( + '--vpn', help='Name of the VPN to startup/join', required=False) + create_vm_parser.add_argument( + '--no-public-ip', dest='public_ip', action='store_false', default=True, + help='Do not assign a public interface to this VM') start_vm_parser = subparsers.add_parser('start-vm') start_vm_parser.add_argument( '--vm-id', dest='vm_id', help='ID of the VM to start', required=True) @@ -96,9 +101,13 @@ def main(): 'command': 'create-vm', 'options': { 'image': args.image, + 'public-ip': args.public_ip, } } + if args.vpn: + data['options']['vpn'] = args.vpn + if args.init_script_path: with open(args.init_script_path, 'rt') as f: data['options']['init-script'] = f.read() @@ -126,7 +135,7 @@ def main(): try: with ServerCommunication() as c: result = c.send_msg(data, response_expected) - print(result) + print(json.dumps(result)) except pika.exceptions.AMQPConnectionError: print('Could not connect to AMQP broker. Is it running?', file=sys.stderr) diff --git a/aetherscale/computing.py b/aetherscale/computing.py @@ -1,4 +1,3 @@ -from dataclasses import dataclass import logging import json import os @@ -14,11 +13,14 @@ import tempfile import time from typing import List, Optional, Dict, Any, Callable -from . import interfaces +from . import networking from .qemu import image, runtime from .qemu.exceptions import QemuException from . import config from . import services +from .vpn.tinc import TincVirtualNetwork, VpnException +from .execution import run_command_chain +import aetherscale.vpn.radvd VDE_FOLDER = '/tmp/vde.ctl' @@ -31,6 +33,8 @@ QUEUE_COMMANDS_MAP = { COMPETING_QUEUE: ['create-vm'], } +RADVD_SERVICE_NAME = 'aetherscale-radvd.service' + logging.basicConfig(level=config.LOG_LEVEL) @@ -48,7 +52,6 @@ def qemu_socket_guest_agent(vm_id: str) -> Path: def create_user_image(vm_id: str, image_name: str) -> Path: base_image = config.BASE_IMAGE_FOLDER / f'{image_name}.qcow2' - print(base_image) if not base_image.is_file(): raise IOError(f'Image "{image_name}" does not exist') @@ -63,18 +66,16 @@ def create_user_image(vm_id: str, image_name: str) -> Path: return user_image -@dataclass -class QemuStartupConfig: - vm_id: str - hda_image: Path - mac_addr: str - vde_folder: Path - - class ComputingHandler: - def __init__(self, service_manager: services.ServiceManager): + def __init__( + self, radvd: aetherscale.vpn.radvd.Radvd, + service_manager: services.ServiceManager): + + self.radvd = radvd self.service_manager = service_manager + self.established_vpns: Dict[str, TincVirtualNetwork] = {} + def list_vms(self, _: Dict[str, Any]) -> List[Dict[str, Any]]: vms = [] @@ -121,14 +122,38 @@ class ComputingHandler: with image.guestmount(user_image) as guest_fs: image.install_startup_script(options['init-script'], guest_fs) - mac_addr = interfaces.create_mac_address() - logging.debug(f'Assigning MAC address "{mac_addr}" to VM "{vm_id}"') + qemu_interfaces = [] + + if 'vpn' in options: + # TODO: Do we have to assign the VPN mac addr to the macvtap? + vpn_tap_device = self._establish_vpn(options['vpn'], vm_id) - qemu_config = QemuStartupConfig( + mac_addr_vpn = networking.create_mac_address() + logging.debug( + f'Assigning MAC address "{mac_addr_vpn}" to ' + f'VM "{vm_id}" for VPN') + + privnet = runtime.QemuInterfaceConfig( + mac_address=mac_addr_vpn, + type=runtime.QemuInterfaceType.TAP, + tap_device=vpn_tap_device) + qemu_interfaces.append(privnet) + + if 'public-ip' in options and options['public-ip']: + mac_addr = networking.create_mac_address() + logging.debug(f'Assigning MAC address "{mac_addr}" to VM "{vm_id}"') + + pubnet = runtime.QemuInterfaceConfig( + mac_address=mac_addr, + type=runtime.QemuInterfaceType.VDE, + vde_folder=Path(VDE_FOLDER)) + qemu_interfaces.append(pubnet) + + qemu_config = runtime.QemuStartupConfig( vm_id=vm_id, hda_image=user_image, - mac_addr=mac_addr, - vde_folder=Path(VDE_FOLDER)) + interfaces=qemu_interfaces) + unit_name = systemd_unit_name_for_vm(vm_id) self._create_qemu_systemd_unit(unit_name, qemu_config) self.service_manager.start_service(unit_name) @@ -204,6 +229,9 @@ class ComputingHandler: return response def delete_vm(self, options: Dict[str, Any]) -> Dict[str, str]: + # TODO: Once all VMs of a VPN on a host have been deleted, we can delete + # the associated VPN + try: vm_id = options['vm-id'] except KeyError: @@ -225,31 +253,46 @@ class ComputingHandler: } def _create_qemu_systemd_unit( - self, unit_name: str, qemu_config: QemuStartupConfig): - hda_quoted = shlex.quote(str(qemu_config.hda_image.absolute())) - device_quoted = shlex.quote( - f'virtio-net-pci,netdev=pubnet,mac={qemu_config.mac_addr}') - netdev_quoted = shlex.quote( - f'vde,id=pubnet,sock={str(qemu_config.vde_folder)}') - name_quoted = shlex.quote( - f'qemu-vm-{qemu_config.vm_id},process=vm-{qemu_config.vm_id}') - + self, unit_name: str, qemu_config: runtime.QemuStartupConfig): + qemu_name = \ + f'qemu-vm-{qemu_config.vm_id},process=vm-{qemu_config.vm_id}' qemu_monitor_path = qemu_socket_monitor(qemu_config.vm_id) - socket_quoted = shlex.quote(f'unix:{qemu_monitor_path},server,nowait') - qga_monitor_path = qemu_socket_guest_agent(qemu_config.vm_id) - qga_chardev_quoted = shlex.quote( - f'socket,path={qga_monitor_path},server,nowait,id=qga0') - - command = \ - f'qemu-system-x86_64 -m 4096 -accel kvm -hda {hda_quoted} ' \ - f'-device {device_quoted} -netdev {netdev_quoted} ' \ - f'-name {name_quoted} ' \ - '-nographic ' \ - f'-qmp {socket_quoted} ' \ - f'-chardev {qga_chardev_quoted} ' \ - '-device virtio-serial ' \ - '-device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0' + qga_chardev = f'socket,path={qga_monitor_path},server,nowait,id=qga0' + + command = [ + 'qemu-system-x86_64', + '-nographic', + '-cpu', 'host', + '-m', '4096', + '-accel', 'kvm', + '-hda', str(qemu_config.hda_image.absolute()), + '-name', qemu_name, + '-qmp', f'unix:{qemu_monitor_path},server,nowait', + '-chardev', qga_chardev, + '-device', 'virtio-serial', + '-device', + 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0', + ] + + for i, interface in enumerate(qemu_config.interfaces): + device = \ + f'virtio-net-pci,netdev=net{i},mac={interface.mac_address}' + + if interface.type == runtime.QemuInterfaceType.VDE: + netdev = f'vde,id=net{i},sock={str(interface.vde_folder)}' + elif interface.type == runtime.QemuInterfaceType.TAP: + netdev = \ + f'tap,id=net{i},ifname={interface.tap_device},' \ + 'script=no,downscript=no' + else: + raise QemuException( + f'Unknown interface type "{interface.type}"') + + command += ['-device', device, '-netdev', netdev] + + command = [shlex.quote(arg) for arg in command] + command = ' '.join(command) with tempfile.NamedTemporaryFile(mode='w+t', delete=False) as f: f.write('[Unit]\n') @@ -264,6 +307,57 @@ class ComputingHandler: self.service_manager.install_service(Path(f.name), unit_name) os.remove(f.name) + def _establish_vpn(self, vpn_name: str, vm_id: str) -> str: + vpn_network_prefix = self.radvd.generate_prefix() + + if vpn_name in self.established_vpns: + vpn = self.established_vpns[vpn_name] + else: + logging.info(f'Creating VPN {vpn_name} for VM {vm_id}') + + vpn = TincVirtualNetwork( + vpn_name, config.VPN_CONFIG_FOLDER, self.service_manager) + vpn.create_config(config.HOSTNAME) + vpn.gen_keypair() + + # TODO: Must be re-established after host reboot + # Create an uninitialized tap device so that tincd can run + # without root permissions + # TODO: Assign an more reasonable IP address + # TODO: In real environments the host does not have to be exposed, + # this is only because I want to proxy IP traffic from the host to + # the guest + host_vpn_ip = vpn_network_prefix.replace('/64', '1') + networking.Iproute2Network.tap_device( + vpn.interface_name, aetherscale.config.USER) + networking.Iproute2Network.bridged_network( + vpn.bridge_interface_name, vpn.interface_name, + ip=host_vpn_ip, flush_ip_device=False) + + vpn.start_daemon() + + self.established_vpns[vpn_name] = vpn + + # Setup radvd for IPv6 auto-configuration + self.radvd.add_interface( + vpn.bridge_interface_name, vpn_network_prefix) + self.service_manager.restart_service(RADVD_SERVICE_NAME) + logging.debug( + f'Added device {vpn.bridge_interface_name} to radvd ' + f'with IPv6 address range {vpn_network_prefix}') + + # Create a new tap device for the VM to use + # TODO: Must be re-established after a host reboot + associated_tap_device = 'vpn-' + vm_id + success = networking.Iproute2Network.tap_device( + associated_tap_device, config.USER, vpn.bridge_interface_name) + if not success: + raise VpnException(f'Could not setup tap for VPN "{vpn_name}"') + logging.debug( + f'Created TAP device {associated_tap_device} for VM {vm_id}') + + return associated_tap_device + def get_process_for_vm(vm_id: str) -> Optional[psutil.Process]: for proc in psutil.process_iter(['name']): @@ -315,6 +409,7 @@ def callback(ch, method, properties, body, handler: ComputingHandler): 'response': response, } except Exception as e: + logging.exception('Unhandled exception') resp_message = { 'execution-info': { 'status': 'error', @@ -358,7 +453,14 @@ def run(): systemd_path = Path.home() / '.config/systemd/user' service_manager = services.SystemdServiceManager(systemd_path) - handler = ComputingHandler(service_manager) + radvd = aetherscale.vpn.radvd.Radvd( + config.AETHERSCALE_CONFIG_DIR / 'radvd.conf', config.VPN_48_PREFIX) + service_manager.install_simple_service( + radvd.get_start_command(), service_name=RADVD_SERVICE_NAME, + description='IPv6 Router Advertisment for VPNs') + service_manager.start_service(RADVD_SERVICE_NAME) + + handler = ComputingHandler(radvd, service_manager) bound_callback = lambda ch, method, properties, body: \ callback(ch, method, properties, body, handler) @@ -368,7 +470,7 @@ def run(): queue=COMPETING_QUEUE, on_message_callback=bound_callback) # a TAP interface for VDE must already have been created - if not interfaces.check_device_existence(VDE_TAP_INTERFACE): + if not networking.Iproute2Network.check_device_existence(VDE_TAP_INTERFACE): logging.error( f'Interface {VDE_TAP_INTERFACE} does not exist. ' 'Please create it manually and then start this service again') diff --git a/aetherscale/config.py b/aetherscale/config.py @@ -1,11 +1,22 @@ import logging import os from pathlib import Path +import pwd +import socket LOG_LEVEL = os.getenv('LOG_LEVEL', default=logging.WARNING) +HOSTNAME = os.getenv('HOSTNAME', default=socket.gethostname()) + RABBITMQ_HOST = os.getenv('RABBITMQ_HOST', default='localhost') BASE_IMAGE_FOLDER = Path(os.getenv('BASE_IMAGE_FOLDER', default='base_images')) USER_IMAGE_FOLDER = Path(os.getenv('USER_IMAGE_FOLDER', default='user_images')) + +AETHERSCALE_CONFIG_DIR = Path.home() / '.config/aetherscale' +VPN_CONFIG_FOLDER = AETHERSCALE_CONFIG_DIR / 'tinc' +VPN_NUM_PREPARED_INTERFACES = 2 +VPN_48_PREFIX = 'fde7:2361:234a' + +USER = pwd.getpwuid(os.getuid()).pw_name diff --git a/aetherscale/interfaces.py b/aetherscale/interfaces.py @@ -1,74 +0,0 @@ -import logging -import random -import subprocess -from typing import Optional - -from . import execution - - -class NetworkException(Exception): - pass - - -def check_device_existence(device: str) -> bool: - # if ip link show dev [devicename] does not find [devicename], it will - # write a message to stderr, but none to stdout - result = subprocess.run( - ['ip', 'link', 'show', 'dev', device], stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL) - - if result.stdout: - return True - else: - return False - - -def init_bridge( - bridge_device: str, phys_device: str, ip: Optional[str], - gateway: Optional[str]) -> bool: - if check_device_existence(bridge_device): - logging.debug( - f'Device {bridge_device} already exists, will not re-create') - return True - else: - logging.debug(f'Creating bridge device {bridge_device}') - - commands = [ - ['ip', 'link', 'add', bridge_device, 'type', 'bridge'], - ['ip', 'link', 'set', bridge_device, 'up'], - ['ip', 'link', 'set', phys_device, 'up'], - ['ip', 'link', 'set', phys_device, 'master', bridge_device], - ['ip', 'addr', 'flush', 'dev', phys_device], - ] - if ip: - commands.append( - ['ip', 'addr', 'add', ip, 'dev', bridge_device]) - if gateway: - commands.append( - ['ip', 'route', 'add', 'default', - 'via', gateway, 'dev', bridge_device]) - - return execution.run_command_chain(commands) - - -def create_tap_device( - tap_device_name, bridge_device_name, user) -> bool: - creation_ok = execution.run_command_chain([ - ['ip', 'tuntap', 'add', 'dev', tap_device_name, - 'mode', 'tap', 'user', user], - ['ip', 'link', 'set', 'dev', tap_device_name, 'up'], - ['ip', 'link', 'set', tap_device_name, 'master', bridge_device_name], - ]) - - return creation_ok - - -def create_mac_address() -> str: - # Set second least significant bit of leftmost pair to 1 (local) - # Set least significant bit of leftmost pair to 0 (unicast) - mac_bits = (random.getrandbits(48) | 0x020000000000) & 0xfeffffffffff - mac_str = '{:012x}'.format(mac_bits) - return ':'.join([ - mac_str[:2], mac_str[2:4], mac_str[4:6], - mac_str[6:8], mac_str[8:10], mac_str[10:], - ]) diff --git a/aetherscale/networking.py b/aetherscale/networking.py @@ -0,0 +1,134 @@ +import logging +import random +import re +import subprocess +from typing import Optional + +from aetherscale import execution + + +def create_mac_address() -> str: + # Set second least significant bit of leftmost pair to 1 (local) + # Set least significant bit of leftmost pair to 0 (unicast) + mac_bits = (random.getrandbits(48) | 0x020000000000) & 0xfeffffffffff + mac_str = '{:012x}'.format(mac_bits) + return ':'.join([ + mac_str[:2], mac_str[2:4], mac_str[4:6], + mac_str[6:8], mac_str[8:10], mac_str[10:], + ]) + + +class NetworkingException(Exception): + pass + + +class Iproute2Network: + @staticmethod + def bridged_network( + bridge_device: str, phys_device: str, + ip: Optional[str] = None, gateway: Optional[str] = None, + flush_ip_device: bool = True) -> bool: + Iproute2Network.validate_device_name(bridge_device) + Iproute2Network.validate_device_name(phys_device) + if ip: + Iproute2Network.validate_ip_address(ip) + if gateway: + Iproute2Network.validate_ip_address(gateway) + + Iproute2Network._create_bridge(bridge_device) + + commands = [ + ['sudo', 'ip', 'link', 'set', phys_device, 'up'], + ['sudo', 'ip', 'link', 'set', phys_device, 'master', bridge_device], + ['sudo', 'ip', 'addr', 'flush', 'dev', phys_device], + ] + + if ip: + if flush_ip_device: + commands.append( + ['sudo', 'ip', 'addr', 'flush', 'dev', bridge_device]) + commands.append( + ['sudo', 'ip', 'addr', 'add', ip, 'dev', bridge_device]) + if gateway: + commands.append( + ['sudo', 'ip', 'route', 'add', 'default', + 'via', gateway, 'dev', bridge_device]) + + return execution.run_command_chain(commands) + + @staticmethod + def tap_device( + tap_device_name: str, user: str, + bridge_device: Optional[str] = None): + Iproute2Network.validate_device_name(tap_device_name) + if bridge_device: + Iproute2Network.validate_device_name(bridge_device) + + if Iproute2Network.check_device_existence(tap_device_name): + logging.debug( + f'Device {tap_device_name} already exists, will not re-create') + return True + else: + logging.debug(f'Creating TAP device {tap_device_name}') + + commands = [ + ['sudo', 'ip', 'tuntap', 'add', 'dev', tap_device_name, + 'mode', 'tap', 'user', user], + ['sudo', 'ip', 'link', 'set', 'dev', tap_device_name, 'up'], + ] + + if bridge_device: + commands.append([ + 'sudo', 'ip', 'link', 'set', tap_device_name, + 'master', bridge_device, + ]) + + creation_ok = execution.run_command_chain(commands) + return creation_ok + + @staticmethod + def _create_bridge(bridge_device: str): + Iproute2Network.validate_device_name(bridge_device) + + if Iproute2Network.check_device_existence(bridge_device): + logging.debug( + f'Device {bridge_device} already exists, will not re-create') + return True + else: + logging.debug(f'Creating bridge device {bridge_device}') + + return execution.run_command_chain([ + ['sudo', 'ip', 'link', 'add', bridge_device, 'type', 'bridge'], + ['sudo', 'ip', 'link', 'set', bridge_device, 'up'], + ]) + + @staticmethod + def check_device_existence(device: str) -> bool: + Iproute2Network.validate_device_name(device) + + # if ip link show dev [devicename] does not find [devicename], it will + # write a message to stderr, but none to stdout + result = subprocess.run( + ['ip', 'link', 'show', 'dev', device], stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL) + + if result.stdout: + return True + else: + return False + + @staticmethod + def validate_device_name(name: str): + if len(name) == 0: + raise NetworkingException('Zero-length device name not allowed') + elif len(name) > 15: + raise NetworkingException('Device name must be max. 15 characters') + elif not re.match('^[a-z0-9-]+$', name): + raise NetworkingException( + f'Invalid name for network device provided ("{name}")') + + @staticmethod + def validate_ip_address(ip_addr: str): + if not re.match(r'^[0-9.:a-f]+(/\d+)?$', ip_addr): + raise NetworkingException( + f'Invalid IP address provided ({ip_addr})') diff --git a/aetherscale/qemu/runtime.py b/aetherscale/qemu/runtime.py @@ -1,14 +1,35 @@ +from dataclasses import dataclass import enum import logging import json from pathlib import Path import random import socket -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, List from aetherscale.qemu.exceptions import QemuException +class QemuInterfaceType(enum.Enum): + TAP = enum.auto() + VDE = enum.auto() + + +@dataclass +class QemuInterfaceConfig: + mac_address: str + type: QemuInterfaceType + vde_folder: Optional[Path] = None + tap_device: Optional[str] = None + + +@dataclass +class QemuStartupConfig: + vm_id: str + hda_image: Path + interfaces: List[QemuInterfaceConfig] + + class QemuProtocol(enum.Enum): QMP = enum.auto() QGA = enum.auto() diff --git a/aetherscale/services.py b/aetherscale/services.py @@ -2,6 +2,7 @@ from abc import ABC, abstractmethod from pathlib import Path import shutil import subprocess +from typing import Optional from aetherscale.execution import run_command_chain @@ -12,6 +13,14 @@ class ServiceManager(ABC): """Installs a service on the system for possible activation""" @abstractmethod + def install_simple_service( + self, command: str, service_name: str, + description: Optional[str] = None) -> bool: + """Installs a simple service for a binary. This function allows us + to make service manager easy to replace, because unlike install_service + it does not need a service-specific configuration file as input.""" + + @abstractmethod def uninstall_service(self, service_name: str) -> bool: """Removes a service from the system once it's no longer needed""" @@ -24,6 +33,10 @@ class ServiceManager(ABC): """Stop a service""" @abstractmethod + def restart_service(self, service_name: str) -> bool: + """Restart a service""" + + @abstractmethod def enable_service(self, service_name: str) -> bool: """Enable a service so that it will be auto-started on reboots""" @@ -56,7 +69,33 @@ class SystemdServiceManager(ServiceManager): except OSError: return False - # Reload system + # Reload systemd + r = subprocess.run(['systemctl', '--user', 'daemon-reload']) + return r.returncode == 0 + + def install_simple_service( + self, command: str, service_name: str, + description: Optional[str] = None) -> bool: + if '.' not in service_name: + raise ValueError('Unit name must contain the suffix, e.g. .service') + + target_unit_path = self._systemd_unit_path(service_name) + target_unit_path.parent.mkdir(parents=True, exist_ok=True) + + if not description: + description = f'aetherscale {service_name}' + + with open(target_unit_path, 'wt') as f: + f.write('[Unit]\n') + f.write(f'Description={description}\n') + f.write('\n') + f.write('[Service]\n') + f.write(f'ExecStart={command}\n') + f.write('\n') + f.write('[Install]\n') + f.write('WantedBy=default.target\n') + + # Reload systemd r = subprocess.run(['systemctl', '--user', 'daemon-reload']) return r.returncode == 0 @@ -80,6 +119,11 @@ class SystemdServiceManager(ServiceManager): ['systemctl', '--user', 'stop', service_name], ]) + def restart_service(self, service_name: str) -> bool: + return run_command_chain([ + ['systemctl', '--user', 'restart', service_name], + ]) + def enable_service(self, service_name: str) -> bool: return run_command_chain([ ['systemctl', '--user', 'enable', service_name], diff --git a/aetherscale/vpn/radvd.py b/aetherscale/vpn/radvd.py @@ -0,0 +1,72 @@ +import os +from pathlib import Path +import tempfile + +import aetherscale.config + + +CONFIG_BLOCK = '''interface INTERFACE { + AdvSendAdvert on; + MinRtrAdvInterval 3; + MaxRtrAdvInterval 10; + prefix PREFIX { + AdvOnLink on; + AdvAutonomous on; + AdvRouterAddr off; + }; +};''' + + +class RadvdException(Exception): + pass + + +class Radvd: + def __init__(self, config_file: Path, prefix: str): + if prefix.count(':') != 2: + raise RadvdException('Prefix must be a /48 prefix') + + self.config_file = config_file + self.prefix = prefix + + # This is a poor man's method to check prefix overlap; we only + # check for duplicate prefixes + self.assigned_prefixes = set() + + # Create an empty configuration file + if self.config_file.is_file(): + os.chmod(self.config_file, 0o600) + + with open(self.config_file, 'wt') as f: + f.write('') + + def generate_prefix(self): + if len(self.assigned_prefixes) >= 65536: + raise RadvdException('Max number of available networks reached') + + return self.prefix + ':' + str(len(self.assigned_prefixes)) + '::/64' + + def add_interface(self, interface_name: str, prefix: str): + if prefix in self.assigned_prefixes: + raise RadvdException(f'Prefix "{prefix}" was already assigned') + + config_block = CONFIG_BLOCK \ + .replace('INTERFACE', interface_name) \ + .replace('PREFIX', prefix) + + # Radvd forces us to have read-only permissions on the file. + # To be able to edit it, we have to alter permissions and change them + # back after our changes + os.chmod(self.config_file, 0o600) + + with open(self.config_file, 'at') as f: + f.write('\n\n' + config_block) + + self.assigned_prefixes.add(prefix) + + os.chmod(self.config_file, 0o400) + + def get_start_command(self): + pidfile = Path(tempfile.gettempdir()) / 'radvd.pid' + return f'/usr/bin/sudo /usr/bin/radvd -n -C {self.config_file} ' \ + f'-u {aetherscale.config.USER} -p {str(pidfile)}' diff --git a/aetherscale/vpn/tinc.py b/aetherscale/vpn/tinc.py @@ -0,0 +1,133 @@ +import logging +import os +from pathlib import Path +import re +import shlex +import shutil +import subprocess +import tempfile +from typing import Optional + +from aetherscale.services import ServiceManager + + +class VpnException(Exception): + pass + + +class TincVirtualNetwork(object): + def __init__( + self, netname: str, config_folder: Path, + service_manager: ServiceManager): + if not self._validate_netname(netname): + raise ValueError( + f'Invalid name for network provided ("{netname}")') + + self.netname = netname + self.config_base_folder = config_folder + self.service_manager = service_manager + # TODO: To support multi VPN each VPN has to use another port + self.port = 20000 + + self.pidfile = Path(tempfile.gettempdir()) / f'tincd-{self.netname}.run' + + def network_exists(self) -> bool: + return self._net_config_folder().is_dir() + + @property + def interface_name(self): + return f'tinc-{self.netname}' + + @property + def bridge_interface_name(self): + return f'tincbr-{self.netname}' + + def create_config(self, hostname: str): + if not re.match('^[a-z0-9]+$', hostname): + raise ValueError(f'Invalid hostname provided ("{hostname}")') + + config_dir = self._net_config_folder() + config_dir.mkdir(parents=True, exist_ok=True) + + with open(config_dir / 'tinc.conf', 'w') as f: + lines = [ + f'Name = {hostname}\n', + 'Mode = switch\n', + f'Interface = {self.interface_name}\n', + f'Port = {self.port}\n', + ] + f.writelines(lines) + + self._create_host(hostname, public_ip=None, pubkey=None) + + def add_peer(self, hostname: str, public_ip: str, pubkey: str): + self._create_host(hostname, public_ip, pubkey) + + with open(self._net_config_folder() / 'tinc.conf', 'a') as f: + f.write(f'ConnectTo = {hostname}') + + def _create_host( + self, hostname: str, public_ip: Optional[str], + pubkey: Optional[str]): + hosts_dir = self._net_config_folder() / 'hosts' + os.makedirs(hosts_dir, exist_ok=True) + + with open(hosts_dir / hostname, 'w') as f: + if public_ip: + f.write(f'Address = {public_ip}\n') + + if pubkey: + f.write('\n') + f.write(pubkey) + + def gen_keypair(self): + logging.debug('Generating key pair for tinc') + subprocess.run( + ['tincd', '-K', '-c', self._net_config_folder()], + stdin=subprocess.DEVNULL) + logging.debug('Finished generating key pair') + + def _validate_netname(self, netname: str): + if not re.match('^[a-z0-9]+$', netname): + return False + if len(netname) > 8: + return False + + return True + + def _net_config_folder(self) -> Path: + return self.config_base_folder / self.netname + + def _service_name(self) -> str: + return f'aetherscale-tincd-{self.netname}.service' + + def start_daemon(self): + net_dir_quoted = shlex.quote(str(self._net_config_folder())) + pidfile_quoted = shlex.quote(str(self.pidfile)) + + service_name = self._service_name() + with tempfile.NamedTemporaryFile('wt') as f: + f.write('[Unit]\n') + f.write(f'Description=aetherscale {self.netname} VPN with tincd\n') + f.write('\n') + f.write('[Service]\n') + f.write( + f'ExecStart=tincd -D -c {net_dir_quoted} ' + f'--pidfile {pidfile_quoted}\n') + f.write('\n') + f.write('[Install]\n') + f.write('WantedBy=default.target\n') + + f.flush() + + logging.debug(f'Installing tinc VPN service "{service_name}"') + self.service_manager.install_service(Path(f.name), service_name) + + self.service_manager.enable_service(service_name) + success = self.service_manager.start_service(service_name) + if not success: + raise VpnException(f'Could not establish VPN "{self.netname}"') + + def teardown_tinc_config(self): + self.service_manager.uninstall_service(self._service_name()) + shutil.rmtree(self._net_config_folder()) diff --git a/bin/setup-vpn-tap-vde.sh b/bin/setup-vpn-tap-vde.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +usage() { + echo "Usage: $0 -u USER -n NUM-DEVICES -p IPv6-48-PREFIX" +} + +while getopts ":hu:n:p:" opt; do + case "$opt" in + h|\?) + usage + exit 0 + ;; + u) user=$OPTARG + ;; + n) num_devices=$OPTARG + ;; + p) prefix=$OPTARG + ;; + esac +done + +if [[ -z $user || -z $num_devices || -z $prefix]]; then + usage + echo + echo "Please specify all required arguments" + exit 1 +fi + +for i in $(seq 1 $num_devices); do + bridge_name=aeth-vpnbr-$i + tinc_name=aeth-vpntnc-$i + vde_name=aeth-vpnvde-$i + + ip link add $bridge_name type bridge + ip link set $bridge_name up + + ip tuntap add dev $tinc_name mode tap user $user + ip link set $tinc_name up + ip link set $tinc_name master $bridge_name + ip addr flush dev $tinc_name + + ip tuntap add dev $vde_name mode tap user $user + ip link set dev $vde_name up + ip link set $vde_name master $bridge_name + + ip addr add $prefix:$i::1/64 dev $bridge_name +done diff --git a/tests/conftest.py b/tests/conftest.py @@ -1,6 +1,6 @@ from pathlib import Path - import pytest +from typing import Optional from aetherscale.services import ServiceManager import aetherscale.timing @@ -28,6 +28,12 @@ def mock_service_manager(): self.services.add(service_name) return True + def install_simple_service( + self, command: str, service_name: str, + description: Optional[str] = None) -> bool: + self.services.add(service_name) + return True + def uninstall_service(self, service_name: str) -> bool: try: self.services.remove(service_name) @@ -50,6 +56,9 @@ def mock_service_manager(): return True + def restart_service(self, service_name: str) -> bool: + return True + def enable_service(self, service_name: str) -> bool: self.enabled_services.add(service_name) return True diff --git a/tests/test_computing.py b/tests/test_computing.py @@ -36,7 +36,9 @@ def test_vm_lifecycle(tmppath, mock_service_manager: ServiceManager): with mock.patch('aetherscale.config.BASE_IMAGE_FOLDER', tmppath), \ mock.patch('aetherscale.config.USER_IMAGE_FOLDER', tmppath): - handler = computing.ComputingHandler(mock_service_manager) + handler = computing.ComputingHandler( + radvd=mock.MagicMock(), service_manager=mock_service_manager) + with base_image(tmppath) as img: result = handler.create_vm({'image': img.stem}) vm_id = result['vm-id'] @@ -65,7 +67,8 @@ def test_run_missing_base_image(tmppath, mock_service_manager: ServiceManager): with mock.patch('aetherscale.config.BASE_IMAGE_FOLDER', tmppath), \ mock.patch('aetherscale.config.USER_IMAGE_FOLDER', tmppath): - handler = computing.ComputingHandler(mock_service_manager) + handler = computing.ComputingHandler( + radvd=mock.MagicMock(), service_manager=mock_service_manager) # specify invalid base image with pytest.raises(OSError): diff --git a/tests/test_interfaces.py b/tests/test_interfaces.py @@ -1,8 +0,0 @@ -from aetherscale.interfaces import create_mac_address - - -def test_mac_address_is_random(): - mac_a = create_mac_address() - mac_b = create_mac_address() - - assert mac_a != mac_b diff --git a/tests/test_networking.py b/tests/test_networking.py @@ -0,0 +1,37 @@ +import pytest + +from aetherscale import networking + + +def test_mac_address_is_random(): + mac_a = networking.create_mac_address() + mac_b = networking.create_mac_address() + + assert mac_a != mac_b + + +def test_device_name_validation(): + # must not raise exception + networking.Iproute2Network.validate_device_name('valid-dev') + networking.Iproute2Network.validate_device_name('qemu-tap-10') + networking.Iproute2Network.validate_device_name('fifteen-chars15') + + with pytest.raises(networking.NetworkingException): + networking.Iproute2Network.validate_device_name('too-long-device-name') + + with pytest.raises(networking.NetworkingException): + networking.Iproute2Network.validate_device_name('invalid space') + + with pytest.raises(networking.NetworkingException): + networking.Iproute2Network.validate_device_name('non-ascii-日本') + + +def test_ip_address_validation(): + # must not raise exception + networking.Iproute2Network.validate_ip_address('10.0.0.1') + networking.Iproute2Network.validate_ip_address('2001:0db8::3b:0:1') + networking.Iproute2Network.validate_ip_address('10.0.0.1/32') + networking.Iproute2Network.validate_ip_address('2001:0db8::/64') + + with pytest.raises(networking.NetworkingException): + networking.Iproute2Network.validate_ip_address('something-invalid') diff --git a/tests/test_radvd.py b/tests/test_radvd.py @@ -0,0 +1,47 @@ +import os +import pytest +import stat + +from aetherscale.vpn import radvd + + +def test_add_interface_config(tmppath): + r = radvd.Radvd(tmppath / 'radvd.conf', '2001:0db8:0') + r.add_interface('my-interface', '2001:0db8:0::/64') + r.add_interface('my-second-interface', '2001:0db8:1::/64') + + with open(r.config_file) as f: + content = f.read() + + assert 'my-interface' in content + assert '001:0db8::/64' + assert 'my-second-interface' in content + + +def test_cannot_assign_same_prefix_twice(tmppath): + r = radvd.Radvd(tmppath / 'radvd.conf', '2001:0db8:0') + r.add_interface('first', '2001:0db8::/64') + + with pytest.raises(radvd.RadvdException): + r.add_interface('second', '2001:0db8::/64') + + +def test_generate_next_prefix(tmppath): + r = radvd.Radvd(tmppath / 'radvd.conf', prefix='2001:0db8:0') + prefix = r.generate_prefix() + r.add_interface('interface', prefix) + prefix2 = r.generate_prefix() + + assert prefix != prefix2 + + +def test_config_is_readonly(tmppath): + r = radvd.Radvd(tmppath / 'radvd.conf', '2001:0db8:0') + r.add_interface('some-interface', '::/64') + + assert stat.S_IMODE(os.lstat(r.config_file).st_mode) == 0o400 + + +def test_drops_privileges(tmppath): + r = radvd.Radvd(tmppath / 'radvd.conf', '2001:0db8:0') + assert '-u' in r.get_start_command() diff --git a/tests/test_services.py b/tests/test_services.py @@ -29,6 +29,7 @@ def test_systemd_calls_system_binary(subprocess_run, tmppath): (systemd.disable_service, 'disable'), (systemd.start_service, 'start'), (systemd.stop_service, 'stop'), + (systemd.restart_service, 'restart'), (systemd.service_is_running, 'is-active'), ]