#!/pkg/bin/perl -w

# $Id: parse_dot.pl,v 1.18 1997/02/01 20:14:06 hoagland Exp $

require "parse.pl";
require "attr.pl";
package parse_dot;

# This file contains operations to break down some text that is in the
# format used to encode graphs in GrIDS data packets.  This is
# DOT-like in appearance but is more restricted and contains encodings
# that DOT doesn't know about.  The routines here have simple
# operations to allow them to be used in different ways and without
# dealing with too complex of data structures.


# get_graphs returns the graphs encoded in the given text.  If the
# text encloses multiple graphs, then those are extracted otherwise
# just the one graph is returned.  The graphs are returned as text.
#
# get_graphs: text -> graph_text x graph_text x ...
#
# e.g. @graphs= &parse_dot::get_graphs(" ... ");
#
sub get_graphs {
  my($text)= shift;
  $_= $text;

  s/^\s*(di|sub)graph\s+("[^"]*"|[^\{\s]+)\s*\{//i;
  s/\}\s*$//;
  
  my(@parts)= &::split_outside_quotes('}',$_);
  pop(@parts) if $parts[$#parts] =~ /^;?\s*$/; # yuck! leftovers
  return $text if (@parts == 1 && $parts[0] !~ /\s*(di|sub)graph\s+/);  # graph was not composite, return original input
  
  my(@graphs);
  foreach (@parts) {
    s/^;//;
    s/^\s+//;
    push(@graphs,$_."}");
  }
  return @graphs;
}

# graph_parts returns the name, nodes, edges, and global attributes
# contained in the graph that is encoded in the given text.  The nodes
# and edges are returned as references to anonymous lists of the
# objects in text format and the attribute list returned is a perl
# list of attr=val strings.  The nodes, edges, and global attributes
# remain in dot format but have trailing semicolons trimmed.  Graph
# names are returned unquoted.
#
# graph_parts: text -> graph_name x node-list-ref x edge-list-ref x attr x attr x ...
#
# e.g. ($name,$nodesref,$edgesref,@attrs)= &parse_dot::graph_parts($graph);
#
sub graph_parts {
  $_= shift;

  s/^\s*(di|sub)graph\s+("[^"]*"|[^\{\s]+)\s*\{//i;
  my($name)= $2;
  s/\}\s*$//;
  
  my(@parts)= &::split_outside_quotes(';',$_);
  @parts= grep(!/^\s*$/,@parts); # filter out empty parts

  my(@nodes,@edges,@attrs)= ();
  foreach (@parts) {
    s/^\s+//;
    s/\s+$//;
    if (/->/) { # an edge
      push(@edges,$_);
    } elsif (/\[/ || /^[^=\[]+$/) { # nodes either contain a "[" or no "="'s
      push(@nodes,$_);
    } else {
      push(@attrs,$_);
    }
  }
  return (&::dot_decode($name),\@nodes,\@edges,@attrs);
}

# node_parts returns the node name and attribute list of the node that
# is encoded in the given text.  The attribute list returned is a perl
# list of attr=val strings (possibly surrounded by whitespace).  The
# name of the node is returned without DOT-encoding.
#
# node_parts: text -> node_name x attr_text x attr_text x ...
#
# e.g. ($name,@attrs)= &parse_dot::node_parts($node);
#
sub node_parts {
  $_= shift;

  s/^\s*(\"[^\"]*\"|[^\s\[]+)\s*//;
  my($name)= $1;
  
  s/^\[//;
  s/\]\s*$//;
  my(@attrs)= $_ eq '' ? () : &::split_outside_quotes(',',$_);
  return (&::dot_decode($name),@attrs);
}

# edge_parts returns the edge source, destination and attribute list
# of the edge that is encoded in the given text.  The attribute list
# returned is a perl list of attr=val strings (possibly surrounded
# by whitespace. The name of the source and destination is returned
# with dot encoding undone.
#
# edge_parts: text -> src_name x dest_name x attr_text x attr_text x ...
#
# e.g. ($src,$dest,@attrs)= &parse_dot::edge_parts($edge);
#
sub edge_parts {
  $_= shift;
  s/^\s*(\"[^\"]*\"|[^\s-]+)\s*->\s*(\"[^\"]*\"|[^\s\[]+)//;
  my($src,$dest)= ($1,$2);
  s/\s*\[//;
  s/\]\s*$//;
  # inline of dot_decode for efficiency
  $src =~ s/^\"//;
  $src =~ s/\"$//;
  $src =~ s/\#(..)/chr(hex($1))/eg;
  $dest =~ s/^\"//;
  $dest =~ s/\"$//;
  $dest =~ s/\#(..)/chr(hex($1))/eg;
  return ($src,$dest,$_ eq '' ? () : &::split_outside_quotes(',',$_));
}

# split_attr returns the name and and value given the graph language
# encoded representation of the attribute.  The name is returned
# unquoted and the attribute value is in a string, in flattened
# format.  See the unflatted_attr routines in attr.pl to undo this.
#
# split_attr: text -> attr_name x attr_value
#
# e.g. ($name,$val)= &parse_dot::split_attr($attr);
#
sub split_attr {
  $_= shift;
  /^\s*(\"[^\"]+\"|[^\s]+)\s*=\s*(.*?)\s*$/;
  ($name,$val)= ($1,$2);
  # inline of dot_decode for efficiency
  $name =~ s/^\"//;
  $name =~ s/\"$//;
  $name =~ s/\#(..)/chr(hex($1))/eg;
  $val =~ s/^\"//;
  $val =~ s/\"$//;
  $val =~ s/\#(..)/chr(hex($1))/eg;
  return ($name,$val);
}



1;


