Source code for fnss.topologies.parsers

"""Functions to parse topologies from datasets or from other generators."""
import re
import math

import networkx as nx

from fnss.topologies.topology import Topology, DirectedTopology
from fnss.util import geographical_distance


__all__ = [
    'parse_rocketfuel_isp_map',
    'parse_rocketfuel_isp_latency',
    'parse_caida_as_relationships',
    'parse_inet',
    'parse_abilene',
    'parse_brite',
    'parse_topology_zoo',
    'parse_ashiip'
          ]


# Parser for RocketFuel ISP (router-level) maps .cch files
[docs]def parse_rocketfuel_isp_map(path): """ Parse a network topology from RocketFuel ISP map file. The ASes provided by the RocketFuel dataset are the following: +------+---------------------+-------+--------+------------+------------+ | ASN | Name | Span | Region | Nodes (r1) | Nodes (r0) | +======+=====================+=======+========+============+============+ | 1221 | Telstra (Australia) | world | AUS | 2999 | 378 (318) | | 1239 | Sprintlink (US) | world | US | 8352 | 700 (604) | | 1755 | EBONE (Europe) | world | Europe | 609 | 172 | | 2914 | Verio (US) | world | US | 7109 | 1013 | | 3257 | Tiscali (Europe) | world | Europe | 855 | 248 (240) | | 3356 | Level 3 (US) | world | US | 3447 | 652 | | 3967 | Exodus (US) | world | US | 917 | 215 (201) | | 4755 | VSNL (India) | world | India | 121 | 12 | | 6461 | Abovenet (US) | world | US | 0 | 202 | | 7018 | AT&T (US) | world | US | 10152 | 656 (631) | +------+---------------------+-------+--------+------------+------------+ Parameters ---------- path : str The path of the file containing the RocketFuel map. It should have extension .cch Returns ------- topology : DirectedTopology The object containing the parsed topology. Notes ----- The returned topology is always directed. If an undirected topology is desired, convert it using the DirectedTopology.to_undirected() method. Each node of the returned graph has the following attributes: * **type**: string * **location**: string (optional) * **address**: string * **r**: int * **backbone**: boolean (optional) Each edge of the returned graph has the following attributes: * type : string, which can either be *internal* or *external* If the topology contains self-loops (links starting and ending in the same node) they are stripped from the topology. Raises ------ ValueError If the provided file cannot be parsed correctly. Examples -------- >>> import fnss >>> topology = fnss.parse_rocketfuel_isp_map('1221.r0.cch') """ topology = DirectedTopology(type='rocket_fuel') comment_char = '#' with open(path, "r") as f: for line in f.readlines(): if comment_char in line: # split on comment char, keep only the part before line, _ = line.split(comment_char, 1) line = line.strip() if len(line) == 0: continue # Parse line. if line.startswith("-"): # Case external node # -euid =externaladdress rn try: node = int(re.findall("-\d+", line)[0]) address = (re.findall("=\S+", line)[0])[1:] # .strip("=") r = int(re.findall("r\d$", line)[0][1:]) # .strip("r")) except IndexError: raise ValueError('Invalid input file. Parsing failed '\ 'while trying to parse an external node') topology.add_node(node, type='external', address=address, r=r) else: # Case internal node # uid @loc [+] [bb] (num_neigh) [&ext] -> <nuid-1> <nuid-2> # ... {-euid} ... =name[!] rn try: node = int(re.findall("\d+", line)[0]) node_location = re.findall("@\S*", line)[0] node_location = re.sub("[\+@]", "", node_location) r = int(re.findall("r\d$", line)[0][1:]) # .strip("r")) address = (re.findall("=\S+", line)[0])[1:] # .strip("=") except IndexError: raise ValueError('Invalid input file. Parsing failed '\ 'while trying to parse an internal node') internal_links = re.findall("<(\d+)>", line) external_links = re.findall("{(-?\d+)}", line) backbone = (len(re.findall("\sbb\s", line)) > 0) topology.add_node(node, type='internal', location=node_location, address=address, r=r, backbone=backbone) for link in internal_links: link = int(link) if node != link: topology.add_edge(node, link, type='internal') for link in external_links: link = int(link) if node != link: topology.add_edge(node, link, type='external') return topology
[docs]def parse_rocketfuel_isp_latency(latencies_path, weights_path=None): """ Parse a network topology from RocketFuel ISP topology file (latency.intra) with inferred link latencies and optionally annotate the topology with inferred weights (weights.infra). The ASes provided by the RocketFuel dataset are the following: +------+---------------------+-------+--------+-------+-------------------+ | ASN | Name | Span | Region | Nodes | Lrgst conn. comp. | +======+=====================+=======+========+=======+===================+ | 1221 | Telstra (Australia) | world | AUS | 108 | 104 | | 1239 | Sprintlink (US) | world | US | 315 | 315 | | 1755 | EBONE (Europe) | world | Europe | 87 | 87 | | 3257 | Tiscali (Europe) | world | Europe | 161 | 161 | | 3967 | Exodus (US) | world | US | 79 | 79 | | 6461 | Abovenet (US) | world | US | 141 | 138 | +------+---------------------+-------+--------+-------+-------------------+ Parameters ---------- latencies_path : str The path of the file containing the RocketFuel latencies file. It should have extension .intra weights_path : str, optional The path of the file containing the RocketFuel weights file. It should have extension .intra Returns ------- topology : DirectedTopology The object containing the parsed topology. Notes ----- The returned topology is directed. It can be converted using the DirectedTopology.to_undirected() method if an undirected topology is desired. Each node of the returned graph has the following attributes: * **name**: string * **location**: string Each edge of the returned graph has the following attributes: * **delay** : int * **wdights** : float (only if a weights file was specified) Raises: ------- ValueError If the provided file cannot be parsed correctly Examples -------- >>> import fnss >>> topology = fnss.parse_rocketfuel_isp_latency('1221.latencies.intra') """ topology = DirectedTopology(type='rocket_fuel', delay_unit='ms') comment_char = '#' node_dict = dict() node_count = 0 with open(latencies_path, "r") as f: for line in f.readlines(): if comment_char in line: # split on comment char, keep only the part before line, _ = line.split(comment_char, 1) line = line.strip() if len(line) == 0: continue u_str, v_str, delay = line.split() try: # Edges endpoints and delay are separated by a space. # An edge endpoint generally has the format <location>,<router-name> # but there are some endpoints which don't, e.g. London4083 in # topology 1239. This function tries first to parse by splitting # at the comma. If it fails, then separates number from location. # If this also fails, it just keeps the node name as it is. try: u_location, u_name = u_str.split(',') except ValueError: match = re.match(r"([a-z]+)([0-9]+)", u_str, re.I) if match: u_location, u_name = match.groups() else: u_location = u_name = u_str try: v_location, v_name = v_str.split(',') except ValueError: match = re.match(r"([a-z]+)([0-9]+)", v_str, re.I) if match: v_location, v_name = match.groups() else: v_location = v_name = v_str if delay.isdigit(): delay = int(delay) else: raise ValueError('Invalid delay value: %s' % delay) except ValueError: raise ValueError('Invalid latencies file. Parsing failed '\ 'while trying to parse an edge') if u_str not in node_dict: node_dict[u_str] = node_count topology.add_node(node_count, location=u_location, name=u_name) node_count += 1 if v_str not in node_dict: node_dict[v_str] = node_count topology.add_node(node_count, location=v_location, name=v_name) node_count += 1 u = node_dict[u_str] v = node_dict[v_str] topology.add_edge(u, v, delay=delay) if weights_path: with open(weights_path, "r") as f: for line in f.readlines(): if comment_char in line: # split on comment char, keep only the part before line, _ = line.split(comment_char, 1) line = line.strip() if len(line) == 0: continue try: u_str, v_str, weight = line.split() except ValueError: raise ValueError('Invalid weight file. Parsing failed '\ 'while trying to parse an edge') try: weight = float(weight) except ValueError: raise ValueError('Invalid weight value: %s' % weight) try: u = node_dict[u_str] v = node_dict[v_str] except KeyError: raise ValueError("The weight file includes edge (%s, %s), " "which was not included in the latencies file" % (u_str, v_str)) topology.adj[u][v]['weight'] = weight return topology
[docs]def parse_caida_as_relationships(path): """ Parse a topology from the CAIDA AS relationships dataset Parameters ---------- path : str The path to the CAIDA AS relationships file Returns ------- topology : DirectedTopology Notes ----- The node names of the returned topology are the the ASN of the of the AS they represent and edges are annotated with the relationship between ASes they connect. The relationship values can either be *customer*, *peer* or *sibling*. References ---------- http://www.caida.org/data/active/as-relationships/ http://as-rank.caida.org/data/ """ topology = DirectedTopology(type='caida_as_relationships') comment_char = '#' relationships_dict = {-1: 'customer', 0: 'peer', 2: 'sibling'} with open(path, "r") as f: for line in f.readlines(): if comment_char in line: # split on comment char, keep only the part before line, _ = line.split(comment_char, 1) line = line.strip() if len(line) == 0: continue entry = line.split('|') try: from_as = int(entry[0]) to_as = int(entry[1]) relationship = relationships_dict[int(entry[2])] except (ValueError, IndexError, KeyError): raise ValueError('Invalid input file. Parsing failed while trying'\ ' to parse a line') if from_as != to_as: topology.add_edge(from_as, to_as, type=relationship) return topology
[docs]def parse_inet(path): """ Parse a topology from an output file generated by the Inet topology generator Parameters ---------- path : str The path to the Inet output file Returns ------- topology : Topology Notes ----- Each node of the returned topology object is labeled with *latitude* and *longitude* attributes. These attributes are not expressed in degrees but in Kilometers. """ topology = Topology(type='inet', distance_unit='Km') with open(path, "r") as f: lines = f.readlines() sep = re.compile('[\s\t]') first_line = sep.split(lines[0].strip()) try: n_nodes = int(first_line[0]) n_links = int(first_line[1]) except (ValueError, IndexError): raise ValueError('Invalid input file. '\ 'Cannot parse the number of nodes and links') if len(lines) != 1 + n_nodes + n_links: raise ValueError('Invalid input file. '\ 'It does not have as many lines as expected') i = 0 for line in lines[1:]: entry = sep.split(line.strip()) if i < n_nodes: i += 1 try: node_id = int(entry[0]) longitude = int(entry[1]) latitude = int(entry[2]) except (ValueError, IndexError): raise ValueError('Invalid input file. Parsing failed while '\ 'trying to parse a node') topology.add_node(node_id, latitude=latitude, longitude=longitude) else: try: u = int(entry[0]) v = int(entry[1]) weight = int(entry[2]) x_u = topology.node[u]['longitude'] y_u = topology.node[u]['latitude'] x_v = topology.node[v]['longitude'] y_v = topology.node[v]['latitude'] length = float(math.sqrt((x_v - x_u) ** 2 + (y_v - y_u) ** 2)) except (ValueError, IndexError): raise ValueError('Invalid input file. Parsing failed while '\ 'trying to parse a link') topology.add_edge(u, v, weight=weight, length=length) return topology
# Ignore external links # Node parameters: city, latitude, longitude # Link parameters: capacity, weight[, link_index, link_type]
[docs]def parse_abilene(topology_path, links_path=None): """ Parse the Abilene topology. Parameters ---------- topology_path : str The path of the Abilene topology file links_path : str, optional The path of the Abilene links file Returns ------- topology : DirectedTopology """ topology = DirectedTopology(type='abilene', capacity_unit='kbps', distance_unit='Km') comment_char = '#' link_type_dict = {0: 'internal', 1: 'inbound', 2: 'outbound'} line_type = None with open(topology_path, "r") as f: for line in f.readlines(): if comment_char in line: # split on comment char, keep only the part before line, _ = line.split(comment_char, 1) line = line.strip() if len(line) > 0: if line == 'router' or line == 'link': line_type = line continue if line_type == 'router': node_entry = line.split('\t') try: name = node_entry[0] city = node_entry[1] latitude = float(node_entry[2]) longitude = float(node_entry[3]) except (ValueError, IndexError): raise ValueError('Invalid input file. Parsing failed '\ 'while trying to parse a router') topology.add_node(name, city=city, latitude=latitude, longitude=longitude) elif line_type == 'link': sep = re.compile('[\s\t]') link_entry = sep.split(line) try: u = link_entry[0] v = link_entry[1] capacity = int(link_entry[2]) lon_u = topology.node[u]['longitude'] lat_u = topology.node[u]['latitude'] lon_v = topology.node[v]['longitude'] lat_v = topology.node[v]['latitude'] length = geographical_distance(lat_v, lon_v, lat_u, lon_u) weight = int(link_entry[3]) except (ValueError, IndexError): raise ValueError('Invalid input file. Parsing failed '\ 'while trying to parse a link') topology.add_edge(u, v, capacity=capacity, weight=weight, length=length) else: raise ValueError('Invalid input file. Found a line that '\ 'I cannot interpret') if links_path: with open(links_path, "r") as f: for line in f.readlines(): if comment_char in line: # split on comment char, keep only the part before line, _ = line.split(comment_char, 1) line = line.strip() if len(line) > 0: sep = re.compile('[\s\t]') link_entry = sep.split(line) try: u, v = link_entry[0].split(',', 1) if u == '*' or v == '*': # ignore external links continue link_index = int(link_entry[1]) link_type = link_type_dict[int(link_entry[2])] except (ValueError, IndexError): raise ValueError('Invalid input file. '\ 'Parsing failed while trying to '\ 'parse a link from links_file') topology.adj[u][v]['link_index'] = link_index topology.adj[u][v]['link_type'] = link_type return topology
[docs]def parse_brite(path, capacity_unit='Mbps', delay_unit='ms', distance_unit='Km', directed=True): """ Parse a topology from an output file generated by the BRITE topology generator Parameters ---------- path : str The path to the BRITE output file capacity_unit : str, optional The unit in which link capacity values are expresses in the BRITE file delay_unit : str, optional The unit in which link delay values are expresses in the BRITE file distance_unit : str, optional The unit in which node coordinates are expresses in the BRITE file directed : bool, optional If True, the topology is parsed as directed topology. Returns ------- topology : Topology or DirectedTopology Notes ----- Each node of the returned topology object is labeled with *latitude* and *longitude* attributes. These attributes are not expressed in degrees but in *distance_unit*. """ # BRITE output format: # http://www.cs.bu.edu/brite/user_manual/node29.html topology = DirectedTopology() if directed else Topology() topology.graph = {'type':'brite', 'capacity_unit':capacity_unit, 'delay_unit':delay_unit, 'distance_unit': distance_unit} line_type = None with open(path, "r") as f: for line in f.readlines(): if line.startswith('Nodes:'): line_type = 'node' elif line.startswith('Edges:'): line_type = 'edge' elif line[0].isdigit(): elements = line.strip().split("\t") if line_type == 'node': # Parse node try: node_id = int(elements[0]) longitude = float(elements[1]) latitude = float(elements[2]) # indegree = int(elements[3]) # outdegree = int(elements[4]) as_id = int(elements[5]) # Node type can be: # AS-only: AS_NODE # Router-only: RT_NODE # Top-down: RT_NODE, RT_BORDER # Bottom-up: RT_NODE node_type = elements[6] except (ValueError, IndexError): raise ValueError('Invalid input file. Parsing failed '\ 'while trying to parse a node') topology.add_node(node_id, latitude=latitude, longitude=longitude, type=node_type) if as_id > 0: topology.node[node_id]['AS'] = as_id elif line_type == 'edge': # Parse link try: edge_id = int(elements[0]) from_node = int(elements[1]) to_node = int(elements[2]) length = float(elements[3]) delay = float(elements[4]) capacity = float(elements[5]) # from_as = elements[6] # to_as = elements[7] # Link type can be: # AS-only: E_AS # Router-only: E_RT # Top-down: E_AS, E_RT # bottom-up: E_RT link_type = elements[8] except (ValueError, IndexError): raise ValueError('Invalid input file. Parsing failed '\ 'while trying to parse a link') topology.add_edge(from_node, to_node, id=edge_id, length=length, delay=delay, capacity=capacity, type=link_type) else: continue return topology
[docs]def parse_topology_zoo(path): """ Parse a topology from the Topology Zoo dataset. Parameters ---------- path : str The path to the Topology Zoo file Returns ------- topology : Topology or DirectedTopology The parsed topology. Notes ----- If the parsed topology contains bundled links, i.e. multiple links between the same pair or nodes, the topology is parsed correctly but each bundle of links is represented as a single link whose capacity is the sum of the capacities of the links of the bundle (if capacity values were provided). The returned topology has a boolean attribute named *link_bundling* which is True if the topology contains at list one bundled link or False otherwise. If the topology contains bundled links, then each link has an additional boolean attribute named *bundle* which is True if that specific link was bundled in the original topology or False otherwise. """ def try_convert_int(value): """ Try to convert a string to an int. If not possible, returns the given value unchanged """ if type(value) != int: try: value = int(value) except ValueError: pass return value if path.endswith('.gml'): topo_zoo_graph = nx.read_gml(path) elif path.endswith('.graphml'): topo_zoo_graph = nx.read_graphml(path) else: raise ValueError('Invalid input file format. It must either be a GML '\ 'or GraphML file (with extensions .gml or .graphml)') topology = DirectedTopology() if topo_zoo_graph.is_directed() \ else Topology() topology.graph['type'] = 'topology_zoo' topology.graph['distance_unit'] = 'Km' topology.graph['link_bundling'] = topo_zoo_graph.is_multigraph() for tv in topo_zoo_graph.nodes(): v = try_convert_int(tv) topology.add_node(v) if 'label' in topo_zoo_graph.node[tv]: topology.node[v]['label'] = topo_zoo_graph.node[tv]['label'] try: longitude = topo_zoo_graph.node[tv]['Longitude'] latitude = topo_zoo_graph.node[tv]['Latitude'] topology.node[v]['longitude'] = longitude topology.node[v]['latitude'] = latitude except KeyError: pass for tv, tu in topo_zoo_graph.edges(): v = try_convert_int(tv) u = try_convert_int(tu) if u == v: continue topology.add_edge(v, u) if 'Latitude' in topo_zoo_graph.node[tv] and \ 'Longitude' in topo_zoo_graph.node[tv] and \ 'Latitude' in topo_zoo_graph.node[tu] and \ 'Longitude' in topo_zoo_graph.node[tu]: lat_v = topo_zoo_graph.node[tv]['Latitude'] lon_v = topo_zoo_graph.node[tv]['Longitude'] lat_u = topo_zoo_graph.node[tu]['Latitude'] lon_u = topo_zoo_graph.node[tu]['Longitude'] length = geographical_distance(lat_v, lon_v, lat_u, lon_u) topology.adj[v][u]['length'] = length if topo_zoo_graph.is_multigraph(): edge = topo_zoo_graph.adj[tv][tu] topology.adj[v][u]['bundle'] = True if len(edge) > 1 else False capacity = 0 for edge_attr in list(edge.values()): if 'LinkSpeedRaw' in edge_attr: capacity += edge_attr['LinkSpeedRaw'] if capacity > 0: topology.adj[v][u]['capacity'] = capacity else: if 'LinkSpeedRaw' in topo_zoo_graph.adj[tv][tu]: topology.adj[v][u]['capacity'] = \ topo_zoo_graph.adj[tv][tu]['LinkSpeedRaw'] if len(nx.get_edge_attributes(topology, 'capacity')) > 0: topology.graph['capacity_unit'] = 'bps' return topology
[docs]def parse_ashiip(path): """ Parse a topology from an output file generated by the aShiip topology generator Parameters ---------- path : str The path to the aShiip output file Returns ------- topology : Topology """ topology = Topology(type='ashiip') with open(path, "r") as f: for line in f.readlines(): # There is no documented aShiip format but we assume that if the line # does not start with a number it is not part of the topology if line[0].isdigit(): node_ids = re.findall("\d+", line) if len(node_ids) < 3: raise ValueError('Invalid input file. Parsing failed while '\ 'trying to parse a line') node = int(node_ids[0]) level = int(node_ids[1]) topology.add_node(node, level=level) for i in range(2, len(node_ids)): topology.add_edge(node, int(node_ids[i])) return topology