cur_group is initially set to false. Technically, a group will be created representing; //the entire xml document. However, as of this version, we allow the xml document owner the liberty; //of choosing what tags to enclose the set of entries document contains. Consequently, we ask for the; //tag name of enclosing an individual entry. The schema that is returned will represent only this; //group, not the entire xml document. The group object is a represenation of an xml group. For our purposes, we track the maximum number of times a particular field exists within a set of entries. For more information as to why, please see the documenation for group.php5. The method for determining the maximum number of fields within a give group is as follows. For each group, count the number of times a particular field exists. Once the end of the group is reached, for each field determine first if it exists in the group template. If not, add it to the group template. If so, check to see if the number of times the element exists in the current entry is less than the template group. If not, set the template group to that number of fields. */ include_once("group.php5"); class schema_discoverer { public $parser; public $cur_group = false; public $ids = array(); public $cur_fieldsets = array(); public $head_tag; public $tag_stack = array(); public $existing_groups = array(); function __construct() { $this->parser = xml_parser_create(); xml_set_object($this->parser,$this); xml_set_element_handler($this->parser,"startHandler","endHandler"); xml_set_character_data_handler($this->parser,"cDataHandler"); } function startHandler($xp, $element, $attribs) { $element = strtolower($element); $last_tag = end($this->tag_stack); //this->cur_group is initially set to false.; //Create the highest element of the group. This will be a group representing the entire; //xml document.; if($this->cur_group === false) { $new_group = new group($element,$this->cur_group); if(array_key_exists($element,$this->existing_groups)) { $new_group->fields = $this->existing_groups[$element]; } $this->cur_group = $new_group; unset($new_group); } else if($last_tag != $this->cur_group->name) { $parent = $this->cur_group; //echo "moving down from $parent->name to "; if(array_key_exists($last_tag,$parent->groups)) { $this->cur_group = $this->cur_group->groups[$last_tag]; } else { $new_group = new group($last_tag,$parent); if(array_key_exists($last_tag,$this->existing_groups)) { $new_group->fields = $this->existing_groups[$element]; } $parent->add_group($new_group); $this->cur_group = $new_group; unset($new_group); } //echo $this->cur_group->name."
"; } $this->tag_stack[] = $element; } function endHandler($xp, $element) { $element = strtolower($element); $last_tag = end($this->tag_stack); //Determine if the end of a group is reached. Note that a group may contain fields; //followed by subgroups followed by more fields. In order to account for this possibility,; //the number of times each field occurs within a group is recorded in an array indexed; //by the group name. Only when the end of the group is reached, is the set of fields for; //the current group compared to the template group. Note that we use the same variable; //cur_fieldsets for tracking the fields of all groups. Had we used a simple array,; //the information would have been lost for those groups containing fields, subgroups; //followed by more fields.; if($element == $this->cur_group->name) { $name = $this->cur_group->name; if(array_key_exists($name,$this->cur_fieldsets)) { $fields = $this->cur_fieldsets[$this->cur_group->name]; foreach($fields as $field=>$count) { if(array_key_exists($field,$this->cur_group->fields)) { if($this->cur_group->fields[$field] < $count) { $this->cur_group->fields[$field] = $count; } } else { $this->cur_group->fields[$field] = $count; } } $this->existing_groups[$element] = $this->cur_group->fields; unset($this->cur_fieldsets[$this->cur_group->name]); $child = $this->cur_group; $this->cur_group = $child->parent; $this->cur_group->add_group($child); //echo $this->cur_group->name."
"; } } else { $name = $this->cur_group->name; if(!array_key_exists($name,$this->cur_fieldsets)) { $this->cur_fieldsets[$name] = array(); $this->cur_fieldsets[$name][$element] = 1; } else { if(array_key_exists($element,$this->cur_fieldsets[$name])) $this->cur_fieldsets[$name][$element]++; else $this->cur_fieldsets[$name][$element] = 1; } } array_pop($this->tag_stack); } function cDataHandler($xp, $data) { } function create_schema($xml_file, $head_tag) { set_time_limit(1000); $in = fopen($xml_file,"r"); while($line = fgets($in)) { $xml = ereg_replace("&","&",$line); $good_parse = xml_parse($this->parser,$xml,false); if(!$good_parse) { die( "BAD PARSE: ".xml_get_current_line_number($this->parser)."
"); } } xml_parser_free($this->parser); $g = ""; foreach($this->cur_group->groups as $group) { if($group->name == $head_tag) { $g = $group; break; } } if($g == "") { echo "ERROR: records must be children of the root node
"; } else { $xml = "\n"; $xml .= $g->structure_to_xml(); } return $xml; } } /* $sd = new schema_discoverer(); $schema = $sd->create_schema("MaaDict.xml","lxgroup"); echo $schema; */ ?>