265 lines
9.5 KiB
PHP
265 lines
9.5 KiB
PHP
<?php
|
|
require_once ('global.php');
|
|
require_once (CRAYON_LANGS_PHP);
|
|
|
|
/* Manages parsing the syntax for any given language, constructing the regex, and validating the
|
|
elements. */
|
|
class CrayonParser {
|
|
// Properties and Constants ===============================================
|
|
const CASE_INSENSITIVE = 'CASE_INSENSITIVE';
|
|
const MULTI_LINE = 'MULTI_LINE';
|
|
const SINGLE_LINE = 'SINGLE_LINE';
|
|
const ALLOW_MIXED = 'ALLOW_MIXED';
|
|
//const NO_END_TAG = '(?![^<]*>)'; // No longer used
|
|
const HTML_CHAR = 'HTML_CHAR';
|
|
const HTML_CHAR_REGEX = '<|>|(&([\w-]+);?)|[ \t]+';
|
|
const CRAYON_ELEMENT = 'CRAYON_ELEMENT';
|
|
const CRAYON_ELEMENT_REGEX = '\{\{crayon-internal:[^\}]*\}\}';
|
|
const CRAYON_ELEMENT_REGEX_CAPTURE = '\{\{crayon-internal:([^\}]*)\}\}';
|
|
|
|
private static $modes = array(self::CASE_INSENSITIVE => TRUE, self::MULTI_LINE => TRUE, self::SINGLE_LINE => TRUE, self::ALLOW_MIXED => TRUE);
|
|
|
|
// Methods ================================================================
|
|
private function __construct() {}
|
|
|
|
/**
|
|
* Parse all languages stored in CrayonLangs.
|
|
* Avoid using this unless you must list the details in language files for all languages.
|
|
* @return array Array of all loaded CrayonLangs.
|
|
*/
|
|
public static function parse_all() {
|
|
$langs = CrayonResources::langs()->get();
|
|
if (empty($langs)) {
|
|
return FALSE;
|
|
}
|
|
foreach ($langs as $lang) {
|
|
self::parse($lang->id());
|
|
}
|
|
return $langs;
|
|
}
|
|
|
|
/* Read a syntax file and parse the regex rules within it, this may require several other
|
|
files containing lists of keywords and such to be read. Updates the parsed elements and
|
|
regex in the CrayonLang with the given $id. */
|
|
public static function parse($id) {
|
|
// Verify the language is loaded and has not been parsed before
|
|
if ( !($lang = CrayonResources::langs()->get($id)) ) {
|
|
CrayonLog::syslog("The language with id '$id' was not loaded and could not be parsed.");
|
|
return FALSE;
|
|
} else if ($lang->is_parsed()) {
|
|
return;
|
|
}
|
|
// Read language file
|
|
$path = CrayonResources::langs()->path($id);
|
|
CrayonLog::debug('Parsing language ' . $path);
|
|
if ( ($file = CrayonUtil::lines($path, 'wcs')) === FALSE ) {
|
|
CrayonLog::debug('Parsing failed ' . $path);
|
|
return FALSE;
|
|
}
|
|
|
|
// Extract the language name
|
|
$name_pattern = '#^[ \t]*name[ \t]+([^\r\n]+)[ \t]*#mi';
|
|
preg_match($name_pattern, $file, $name);
|
|
if (count($name) > 1) {
|
|
$name = $name[1];
|
|
$lang->name($name);
|
|
$file = preg_replace($name_pattern, '', $file);
|
|
} else {
|
|
$name = $lang->id();
|
|
}
|
|
|
|
// Extract the language version
|
|
$version_pattern = '#^[ \t]*version[ \t]+([^\r\n]+)[ \t]*#mi';
|
|
preg_match($version_pattern, $file, $version);
|
|
if (count($version) > 1) {
|
|
$version = $version[1];
|
|
$lang->version($version);
|
|
$file = preg_replace($version_pattern, '', $file);
|
|
}
|
|
|
|
// Extract the modes
|
|
$mode_pattern = '#^[ \t]*(' . implode('|', array_keys(self::$modes)) . ')[ \t]+(?:=[ \t]*)?([^\r\n]+)[ \t]*#mi';
|
|
preg_match_all($mode_pattern, $file, $mode_matches);
|
|
if (count($mode_matches) == 3) {
|
|
for ($i = 0; $i < count($mode_matches[0]); $i++) {
|
|
$lang->mode($mode_matches[1][$i], $mode_matches[2][$i]);
|
|
}
|
|
$file = preg_replace($mode_pattern, '', $file);
|
|
}
|
|
|
|
/* Add reserved Crayon element. This is used by Crayon internally. */
|
|
$crayon_element = new CrayonElement(self::CRAYON_ELEMENT, $path, self::CRAYON_ELEMENT_REGEX);
|
|
$lang->element(self::CRAYON_ELEMENT, $crayon_element);
|
|
|
|
// Extract elements, classes and regex
|
|
$pattern = '#^[ \t]*([\w:]+)[ \t]+(?:\[([\w\t ]*)\][ \t]+)?([^\r\n]+)[ \t]*#m';
|
|
preg_match_all($pattern, $file, $matches);
|
|
|
|
if (!empty($matches[0])) {
|
|
$elements = $matches[1];
|
|
$classes = $matches[2];
|
|
$regexes = $matches[3];
|
|
} else {
|
|
CrayonLog::syslog("No regex patterns and/or elements were parsed from language file at '$path'.");
|
|
}
|
|
|
|
// Remember state in case we encounter catchable exceptions
|
|
$error = FALSE;
|
|
for ($i = 0; $i < count($matches[0]); $i++) {
|
|
// References
|
|
$name = &$elements[$i];
|
|
$class = &$classes[$i];
|
|
$regex = &$regexes[$i];
|
|
$name = trim(strtoupper($name));
|
|
// Ensure both the element and regex are valid
|
|
if (empty($name) || empty($regex)) {
|
|
CrayonLog::syslog("Element(s) and/or regex(es) are missing in '$path'.");
|
|
$error = TRUE;
|
|
continue;
|
|
}
|
|
// Look for fallback element
|
|
$pieces = explode(':', $name);
|
|
if (count($pieces) == 2) {
|
|
$name = $pieces[0];
|
|
$fallback = $pieces[1];
|
|
} else if (count($pieces) == 1) {
|
|
$name = $pieces[0];
|
|
$fallback = '';
|
|
} else {
|
|
CrayonLog::syslog("Too many colons found in element name '$name' in '$path'");
|
|
$error = TRUE;
|
|
continue;
|
|
}
|
|
// Create a new CrayonElement
|
|
$element = new CrayonElement($name, $path);
|
|
$element->fallback($fallback);
|
|
if (!empty($class)) {
|
|
// Avoid setting known css to blank
|
|
$element->css($class);
|
|
}
|
|
if ($element->regex($regex) === FALSE) {
|
|
$error = TRUE;
|
|
continue;
|
|
}
|
|
// Add the regex to the element
|
|
$lang->element($name, $element);
|
|
$state = $error ? CrayonLang::PARSED_ERRORS : CrayonLang::PARSED_SUCCESS;
|
|
$lang->state($state);
|
|
}
|
|
|
|
/* Prevents < > and other html entities from being printed as is, which could lead to actual html tags
|
|
* from the printed code appearing on the page - not good. This can also act to color any HTML entities
|
|
* that are not picked up by previously defined elements.
|
|
*/
|
|
$html = new CrayonElement(self::HTML_CHAR, $path, self::HTML_CHAR_REGEX);
|
|
$lang->element(self::HTML_CHAR, $html);
|
|
}
|
|
|
|
// Validates regex and accesses data stored in a CrayonElement
|
|
public static function validate_regex($regex, $element) {
|
|
if (is_string($regex) && @get_class($element) == CRAYON_ELEMENT_CLASS) {
|
|
// If the (?alt) tag has been used, insert the file into the regex
|
|
$file = self::regex_match('#\(\?alt:(.+?)\)#', $regex);
|
|
if ( count($file) == 2 ) {
|
|
// Element 0 has full match, 1 has captured groups
|
|
for ($i = 0; $i < count($file[1]); $i++) {
|
|
$file_lines = CrayonUtil::lines(dirname($element->path()) . crayon_s() . $file[1][$i], 'rcwh');
|
|
if ($file_lines !== FALSE) {
|
|
$file_lines = implode('|', $file_lines);
|
|
// If any spaces exist, treat them as whitespace
|
|
$file_lines = preg_replace('#[ \t]+#msi', '\s+', $file_lines);
|
|
$regex = str_replace($file[0][$i], "(?:$file_lines)", $regex);
|
|
} else {
|
|
CrayonLog::syslog("Parsing of '{$element->path()}' failed, an (?alt) tag failed for the element '{$element->name()}'" );
|
|
return FALSE;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If the (?default:element) function is used, replace the regex with the default, if exists
|
|
$def = self::regex_match('#\(\?default(?:\:(\w+))?\)#', $regex);
|
|
if ( count($def) == 2 ) {
|
|
// Load default language
|
|
$default = CrayonResources::langs()->get(CrayonLangs::DEFAULT_LANG);
|
|
// If default has not been loaded, we can't use it, skip the element
|
|
if (!$default) {
|
|
CrayonLog::syslog(
|
|
"Could not use default regex in the element '{$element->name()}' in '{$element->path()}'");
|
|
return FALSE;
|
|
}
|
|
for ($i = 0; $i < count($def[1]); $i++) {
|
|
// If an element has been provided
|
|
$element_name = ( !empty($def[1][$i]) ) ? $def[1][$i] : $element->name();
|
|
if (($default_element = $default->element($element_name)) != FALSE) {
|
|
$regex = str_replace($def[0][$i], '(?:' . $default_element->regex() .')', $regex);
|
|
} else {
|
|
CrayonLog::syslog("The language at '{$element->path()}' referred to the Default Language regex for element '{$element->name()}', which did not exist.");
|
|
if (CRAYON_DEBUG) {
|
|
CrayonLog::syslog("Default language URL: " . CrayonResources::langs()->url(CrayonLangs::DEFAULT_LANG));
|
|
CrayonLog::syslog("Default language Path: " . CrayonResources::langs()->path(CrayonLangs::DEFAULT_LANG));
|
|
}
|
|
return FALSE;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If the (?html) tag is used, escape characters in html (<, > and &)
|
|
$html = self::regex_match('#\(\?html:(.+?)\)#', $regex);
|
|
if ( count($html) == 2 ) {
|
|
for ($i = 0; $i < count($html[1]); $i++) {
|
|
$regex = str_replace($html[0][$i], htmlentities($html[1][$i]), $regex);
|
|
}
|
|
}
|
|
|
|
// Ensure all parenthesis are atomic to avoid conflicting with element matches
|
|
$regex = CrayonUtil::esc_atomic($regex);
|
|
|
|
// Escape #, this is our delimiter
|
|
$regex = CrayonUtil::esc_hash($regex);
|
|
|
|
// Test if regex is valid
|
|
if (@preg_match("#$regex#", '') === FALSE) {
|
|
CrayonLog::syslog("The regex for the element '{$element->name()}' in '{$element->path()}' is not valid.");
|
|
return FALSE;
|
|
}
|
|
|
|
return $regex;
|
|
} else {
|
|
return '';
|
|
}
|
|
}
|
|
|
|
public static function validate_css($css) {
|
|
if (is_string($css)) {
|
|
// Remove dots in CSS class and convert to lowercase
|
|
$css = str_replace('.', '', $css);
|
|
$css = strtolower($css);
|
|
$css = explode(' ', $css);
|
|
$css_str = '';
|
|
foreach ($css as $c) {
|
|
if (!empty($c)) {
|
|
$css_str .= $c . ' ';
|
|
}
|
|
}
|
|
return trim($css_str);
|
|
} else {
|
|
return '';
|
|
}
|
|
}
|
|
|
|
public static function regex_match($pattern, $subject) {
|
|
if (preg_match_all($pattern, $subject, $matches)) {
|
|
return $matches;
|
|
}
|
|
return array();
|
|
}
|
|
|
|
public static function modes() {
|
|
return self::$modes;
|
|
}
|
|
|
|
public static function is_mode($name) {
|
|
return is_string($name) && array_key_exists($name, self::$modes);
|
|
}
|
|
}
|
|
?>
|