include_once("../../php5/group.php5");
include_once("../../php5/schema_loader.php5");
include_once("../../php5/schema_discoverer.php5");
/**
* @package JLex
* @author Jonathan Dick
* @date Dec. 30, 2005
*/
class field_reorderer {
/**#@+
* @access public
*/
public $parser;
/**
* The file pointer for writing to disk.
* @var resource
*/
public $out;
/**
* The current cDATA.
* @var string
*/
public $cur_data;
/**
* The XML tag marking the beginning of an entry.
* @var string
*/
public $head_tag;
/**
* This variable holds two types of fieldsets depending on the function being implemented. If function 1
* (field search) is being used, this variable contains the growing list of fields found within entries in
* the XML document. If function 2 (modification) is being used, this contains the set of fields to be
* included in the modified version of the XML document.
* @var array
*/
public $fields;
/**
* An array indexed by field name containing the intended position within an entry.
*
* @var array
*/
public $group_field_orders;
/**
* Boolean variable which is set to true one a start $head_tag is found and false when an end $head_tag is parsed.
* @var boolean
*/
public $in_entry;
public $cur_group;
public $cur_tag;
/**#@-*/
function __construct()
{
$this->cur_data = "";
$this->fields = array();
}
/**
* This functions handles the response when the beginning of a new tag is encountered during the parse.
*
* The following describes the inner workings of this function:
*
* - If the element is a head group element, we are at the beginning of a new entry:
*
* - So all subgroups must be destroyed. This is vitally important in php4 because if all subgroups are
* not explicitly destroyed, they do not get trash collected and very quickly, you get memory usage errors.
* - Next, new ids must be assigned to the current group.
* - The parent of the current group is set to false. The top level group obvioulsy has not
* parent. Setting it the parent variable to false creates a method for testing whether you are at the root (if the
* parent is false, you are at the root).
* - The template is set to the empty string. Note that the template does not include the head group but
* is assumed. See the group class documentation for an explanation.
* - get_entry is set to true. This is a relic from an earlier way of extracting information from the xml
* document. It is set to false at the end of an entry and set to true at the start tag introducing an entry.
*
*
* - If get_entry is true:
*
* - If the current tag is an id tag, we set delete_entry to true. See the explanation for the variable
* delete_entry for an explanation.
* - Otherwise, check to see if current element is one of the fields for the current group.
*
* - If its not, it means that the tag marks the beginning of a new subgroup. We will create a new group,
* provide it ids and place it in the group_values array of the current group. We will then reassign the
* cur_group to this new child and continue with the parse.
* - If it is a field in the current group, we simply set the cur_tag to the element.
*
*
*
*
*
*/
function startHandler($xp, $element, $attribs) {
//echo "START: $element \n";
$element = strtolower($element);
if($element == $this->head_tag)
{
$this->cur_group->destroy_subgroups();
$this->cur_group->parent = false;
$this->get_entry = true;
}
else if($this->get_entry) {
if($element == $this->cur_group->name."_id") {
$this->delete_entry = true;
$this->cur_tag = $element;
}
else {
if(!array_key_exists($element,$this->cur_group->fields)) {
if(!array_key_exists($element,$this->cur_group->groups)) {
echo "ERROR: $element not in ".$this->cur_group->name." line ".xml_get_current_line_number($this->parser)."
";
die("ERROR: XML does not match schema");
}
//echo "moving down from ".$this->cur_group->name." to ";
$new_group = clone $this->cur_group->groups[$element];
$this->cur_group->add_group_value($new_group);
$this->cur_group = $new_group;
unset($new_group);
}
else {
$this->cur_tag = $element;
}
}
}
}
/**
* This functions handles the response for when an end tag is reached.
*
* The following describes the inner workings of this function:
*
* - If the end tag equals the head tag, we are at the end of an entry in the xml document. We then:
*
* - Increment the counter.
* - If the count is a multiple of 1000, we print a message saying so.
* - We set the template of the current group to template.
* - Add the current groups main id to head_tag_ids.
* - Export the data in the current group into rows which are placed in the results array containing an
* index for each table. If there are more than 20 rows, we write the rows to file. If the number of bytes
* in the associated file for a given group is greater than the max file size, we bulk upload the file
* into MySQL. The LOAD query depends on the server settings. Please see the MySQL documentation for an
* explanation for how to use this function.
*
*
* - If the end tag is to the current group (though not the head tag), we have come to the end of a group.
*
* - Add a "/group_name" to the template. The slash identifies that this is the end of a group.
* - The parent of the group just ended is set to the current group. In other words, we move down the tree
* (towards the root).
*
*
* - If the first two conditions are not ture, the element must be the end of a field within the current group.
*
* - If the tag is an id tag, we do nothing with it. IDs are only set by the assign_ids function and are a meta-
* field about an entry, not part of it. We do not want to include this as a field.
* - Otherwise, take the values of cur_tag and cur_data and add a field to the current group.
*
*
*
*/
function endHandler($xp, $element) {
$element = strtolower($element);
if($this->head_tag == $element) {
$entry = $this->cur_group->produce_reordered_shoebox_entry($this->group_field_orders);
fwrite($this->out,$entry."\n");
}
else if($element == $this->cur_group->name) {
$child = &$this->cur_group;
unset($this->cur_group);
$this->cur_group = &$child->parent;
unset($child);
}
else if($element != $this->cur_group->name."_id"){
$this->cur_group->add_value($this->cur_tag, $this->cur_data);
$this->cur_tag = "";
$this->cur_data = "";
}
}
/**
* Function for handling cDATA encountered when parsing the XML document.
*
* If delete_entry is true, the current tag must be an id tag. The current group''s id will be set to the
* value of the current cDATA and the ids[group] will be decremented. When the group is initially created,
* it's ids are assigned to it's respective groups. It is not until an id tag is parsed that we can know
* the id originally assigned is not necessary. We still want to make this id available for later use and that is
* why we decrement the ids[group_name] value.
*/
function cDataHandler($xp, $data) {
$data = trim($data);
if($data != "") {
$data = ereg_replace("&","&",$data);
$data = ereg_replace("<","<",$data);
$data = ereg_replace(">",">",$data);
$this->cur_data .= $data;
}
}
/**
* Produces an html table with a row corresponding to each group contained by and including the parameter $group.
* Each row in the table has two cells: A textarea for the user to supply the preferred order of fields for that
* group and a list of the fields within that group. Note that the list of fields should only include fields
* that appear in the original shoebox. Therefore no '_s' fields should appear nor the alpha field.
*
* @param group $group The group object representing the xml structure of a shoebox entry.
* @param array $analyzed_groups This is an array of the group names corresponding to the groups which have
* already been examined. The function is recursive and the purpose of the variable it so avoid
* examining the same groups more than once.
*/
function get_group_fields($group,$analyzed_groups,$link) {
$name = $group->name;
echo "Group: $name
";
$cell_fields = "";
$cell_links = " | ";
$total = 0;
$xpath = "";
$temp = $group;
while($temp) {
$xpath = "/".$temp->name.$xpath;
$temp = $temp->parent;
}
unset($temp);
foreach($group->fields as $field=>$count) {
if(ereg("[_]s$",$field)) {
$short = substr($field,0,-2);
if(!array_key_exists($short,$group->fields)) {
$cur_xpath = $xpath."/".$field;
$cell_fields .= $field." ";
$cell_links .= "";
$cell_links .= "$cur_xpath ";
$total++;
}
}
else if($field != "alpha"){
$cur_xpath = $xpath."/".$field;
$cell_fields .= "$field ";
$cell_links .= "";
$cell_links .= "Find entries: $cur_xpath ";
$total++;
}
}
foreach($group->groups as $group_name=>$child_group) {
$cur_xpath = $xpath."/".$group_name;
$cell_fields .= "$group_name ";
$cell_links .= "";
$cell_links .= "Find entries: $cur_xpath";
$total++;
}
$cell_fields .= " | ";
$cell_links .= "";
$table = "";
$table .= " | ";
$table .= $cell_fields.$cell_links."
";
echo "Total Fields: $total
";
echo "Provide order for $name: (one field per line)
";
echo $table;
echo "
";
foreach($group->groups as $group_name=>$child_group) {
if(!in_array($group_name,$analyzed_groups)) {
$this->get_group_fields($child_group,$analyzed_groups,$link);
}
}
}
/**
* get_order determines the order of fields for each entry given user supplied data.
*
* Any field not explicitly specified will be put alphabetically following all explicitly
* specified fields.
*/
function set_group_field_orders($group_names, $form_vars, $group) {
foreach($group_names as $group_name) {
$cur_index = 0;
$textarea = stripslashes($form_vars[$group_name]);
$textarea = trim(utf8_decode($textarea));
$lines = explode("\n",$textarea);
$g = $group->get_group($group_name);
foreach($lines as $field) {
$field = trim($field);
if(array_key_exists($field,$g->fields) || array_key_exists($field,$g->groups)) {
$this->group_field_orders[$group_name][$field] = $cur_index;
$cur_index++;
}
}
foreach($g->fields as $field=>$count) {
if(!array_key_exists($field,$this->group_field_orders[$group_name])) {
if(ereg("[_]s$",$field)) {
$short = substr($field,0,-2);
if(!array_key_exists($short,$g->fields)) {
$this->group_field_orders[$group_name][$field] = $cur_index;
$cur_index++;
}
}
else if($field != "alpha"){
$this->group_field_orders[$group_name][$field] = $cur_index;
$cur_index++;
}
}
}
foreach($g->groups as $child_name=>$child_group) {
if(!array_key_exists($child_name,$this->group_field_orders[$group_name])) {
$this->group_field_orders[$group_name][$child_name] = $cur_index;
$cur_index++;
}
}
}
}
/**
* This function produces a modified version of an XML document based on conditions set forth in the required
* parameters.
*
* @param string $xml The name of the $xml file to be modified.
* @param string $new_xml_name The name of the new modified XML file. This is used in case the user wants to save
* then new XML file.
* @param array $fields An array used to store a list of all fields within entries in the XML document.
*/
function reorder_fields($xml, $new_shoebox_name, $head_tag, $form_vars, $group) {
$this->cur_group = $group;
$group_names = $this->cur_group->get_group_names();
$this->set_group_field_orders($group_names,$form_vars,$this->cur_group);
$this->parser = xml_parser_create();
xml_set_object($this->parser,$this);
xml_set_element_handler($this->parser,"startHandler","endHandler");
xml_set_character_data_handler($this->parser,"cDataHandler");
$this->head_tag = $head_tag;
$this->out = fopen($new_shoebox_name,"w");
$in = fopen($xml,"r");
while($line = fgets($in)) {
xml_parse($this->parser,$line,false);
}
fclose($in);
fclose($this->out);
xml_parser_free($this->parser);
}
}
session_start();
error_reporting(0);
set_time_limit(0);
$form_vars = array_merge($_POST, $_GET);
$function = $form_vars["function"];
if($function == "get_fields") {
$fr = new field_reorderer();
$xml_file = "../../project_xml_files/".$form_vars["xml_file"];
$_SESSION["xml_file"] = $xml_file;
$head_tag = strtolower($form_vars["head_tag"]);
$_SESSION["head_tag"] = $head_tag;
$new_shoebox_name = $form_vars["new_shoebox_name"];
$_SESSION["new_shoebox_name"] = "../../project_xml_files/".$new_shoebox_name;
$id_field = strtolower($form_vars["id_field"]);
$sd = new schema_discoverer();
$schema = $sd->create_schema($xml_file,$head_tag);
$sl = new schema_loader();
$cur_group = $sl->create_group_structure($schema, $xml);
$fr->cur_group = $cur_group;
$analyzed_groups = array();
$_SESSION["fr"] = $fr;
echo "";
echo "";
echo "
";
echo "";
}
else if ($function == "reorder_fields") {
$xml_file = $_SESSION["xml_file"];
$new_shoebox_name = $_SESSION["new_shoebox_name"];
$head_tag = $_SESSION["head_tag"];
$fr = $_SESSION["fr"];
$fr->reorder_fields($xml_file,$new_shoebox_name,$head_tag,$form_vars,$fr->cur_group);
//$fr->reorder_fields("ActiveNahuatl_2006.xml","new_shoebox.emacs","refgroup",$form_vars,$fr->cur_group);
echo "Thank you!
";
echo "The new version of your shoebox is located at /jlex/nahuatl/php5/$new_shoebox_name";
}
?>