""" This module contains the PyGTKCodeBuffer-class. This class is a specialisation of the gtk.TextBuffer and enables syntax-highlighting for PyGTK's TextView-widget. To use the syntax-highlighting feature you have load a syntax-definition or specify your own. To load one please read the docs for the SyntaxLoader() class. """ # This library is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . import gtk import pango import re import sys import os.path import xml.sax import imp from xml.sax.handler import ContentHandler from xml.sax.saxutils import unescape __version__ = "1.0RC2" __author__ = "Hannes Matuschek " # defined the default styles DEFAULT_STYLES = { 'DEFAULT': {'font': 'monospace'}, 'comment': {'foreground': '#0000FF'}, 'preprocessor': {'foreground': '#A020F0', 'weight': pango.WEIGHT_BOLD}, 'keyword': {'foreground': '#A52A2A', 'weight': pango.WEIGHT_BOLD}, 'special': {'foreground': '#006600'}, 'mark1': {'foreground': '#008B8B'}, 'mark2': {'foreground': '#6A5ACD'}, 'string': {'foreground': '#CC00CC'}, 'number': {'foreground': '#CC00CC'}, 'datatype': {'foreground': '#2E8B57', 'weight': pango.WEIGHT_BOLD}, 'function': {'foreground': '#008A8C'}, 'link': {'foreground': '#0000FF', 'underline': pango.UNDERLINE_SINGLE}} def _main_is_frozen(): """ Internal used function. """ return (hasattr(sys, "frozen") or # new py2exe hasattr(sys, "importers") # old py2exe or imp.is_frozen("__main__")) # tools/freeze if _main_is_frozen(): this_module_path = os.path.dirname(sys.executable) else: this_module_path = os.path.abspath(os.path.dirname(__file__)) # defines default-search paths for syntax-files SYNTAX_PATH = [ os.path.join('.', 'syntax'), this_module_path, os.path.join(os.path.expanduser('~'),".pygtkcodebuffer"), os.path.join(sys.prefix,"share","pygtkcodebuffer","syntax")] # enable/disable debug-messages DEBUG_FLAG = False # # Some log functions... # (internal used) def _log_debug(msg): if not DEBUG_FLAG: return sys.stderr.write("DEBUG: ") sys.stderr.write(msg) sys.stderr.write("\n") def _log_warn(msg): sys.stderr.write("WARN: ") sys.stderr.write(msg) sys.stderr.write("\n") def _log_error(msg): sys.stderr.write("ERROR: ") sys.stderr.write(msg) sys.stderr.write("\n") def add_syntax_path(path_or_list): """ This function adds one (string) or many (list of strings) paths to the global search-paths for syntax-files. """ global SYNTAX_PATH # handle list of strings if isinstance(path_or_list, (list, tuple)): for i in range(len(path_or_list)): SYNTAX_PATH.insert(0, path_or_list[-i]) # handle single string elif isinstance(path_or_list, basestring): SYNTAX_PATH.insert(0, path_or_list) # handle attr-error else: raise TypeError, "Argument must be path-string or list of strings" class Pattern: """ More or less internal used class representing a pattern. You may use this class to "hard-code" your syntax-definition. """ def __init__(self, regexp, style="DEFAULT", group=0, flags=""): """ The constructor takes at least on argument: the regular-expression. The optional kwarg style defines the style applied to the string matched by the regexp. The kwarg group may be used to define which group of the regular expression will be used for highlighting (Note: This means that only the selected group will be highlighted but the complete pattern must match!) The optional kwarg flags specifies flags for the regular expression. Look at the Python lib-ref for a list of flags and there meaning.""" # assemble re-flag flags += "ML"; flag = 0 _log_debug("init rule %s -> %s (%s)"%(regexp, style, flags)) for char in flags: if char == 'M': flag |= re.M if char == 'L': flag |= re.L if char == 'S': flag |= re.S if char == 'I': flag |= re.I if char == 'U': flag |= re.U if char == 'X': flag |= re.X # compile re try: self._regexp = re.compile(regexp, flag) except re.error, e: raise Exception("Invalid regexp \"%s\": %s"%(regexp,str(e))) self._group = group self.tag_name = style def __call__(self, txt, start, end): m = self._regexp.search(txt) if not m: return None mstart, mend = m.start(self._group), m.end(self._group) s = start.copy(); s.forward_chars(mstart) e = start.copy(); e.forward_chars(mend) return (s,e) class KeywordList(Pattern): """ This class may be used for hard-code a syntax-definition. It specifies a pattern for a keyword-list. This simplifies the definition of keyword-lists. """ def __init__(self, keywords, style="keyword", flags=""): """ The constructor takes at least on argument: A list of strings specifying the keywords to highlight. The optional kwarg style specifies the style used to highlight these keywords. The optional kwarg flags specifies the flags for the (internal generated) regular-expression. """ regexp = "(?:\W|^)(%s)\W"%("|".join(keywords),) Pattern.__init__(self, regexp, style, group=1, flags=flags) class String: """ This class may be used to hard-code a syntax-definition. It simplifies the definition of a "string". A "string" is something that consists of a start-pattern and an end-pattern. The end-pattern may be content of the string if it is escaped. """ def __init__(self, starts, ends, escape=None, style="string"): """ The constructor needs at least two arguments: The start- and end-pattern. The optional kwarg escape specifies a escape-sequence escaping the end-pattern. The optional kwarg style specifies the style used to highlight the string. """ try: self._starts = re.compile(starts) except re.error, e: raise Exception("Invalid regexp \"%s\": %s"%(regexp,str(e))) if escape: end_exp = "[^%(esc)s](?:%(esc)s%(esc)s)*%(end)s" end_exp = end_exp%{'esc':escape*2,'end':ends} else: end_exp = ends try: self._ends = re.compile(end_exp) except re.error, e: raise Exception("Invalid regexp \"%s\": %s"%(regexp,str(e))) self.tag_name = style def __call__(self, txt, start, end): start_match = self._starts.search(txt) if not start_match: return start_it = start.copy() start_it.forward_chars(start_match.start(0)) end_it = end.copy() end_match = self._ends.search(txt, start_match.end(0)-1) if end_match: end_it.set_offset(start.get_offset()+end_match.end(0)) return start_it, end_it class LanguageDefinition: """ This class is a container class for all rules (Pattern, KeywordList, ...) specifying the language. You have to used this class if you like to hard-code your syntax-definition. """ def __init__(self, rules): """ The constructor takes only one argument: A list of rules (i.e Pattern, KeywordList and String). """ self._grammar = rules self._styles = dict() def __call__(self, buf, start, end=None): # if no end given -> end of buffer if not end: end = buf.get_end_iter() mstart = mend = end mtag = None txt = buf.get_slice(start, end) # search min match for rule in self._grammar: # search pattern m = rule(txt, start, end) if not m: continue # prefer match with smallest start-iter if m[0].compare(mstart) < 0: mstart, mend = m mtag = rule.tag_name continue if m[0].compare(mstart)==0 and m[1].compare(mend)>0: mstart, mend = m mtag = rule.tag_name continue return (mstart, mend, mtag) def get_styles(self): return self._styles class SyntaxLoader(ContentHandler, LanguageDefinition): """ This class loads a syntax definition. There have to be a file named LANGUAGENAME.xml in one of the directories specified in the global path-list. You may add a directory using the add_syntax_path() function. """ # some translation-tables for the style-defs: style_weight_table = {'ultralight': pango.WEIGHT_ULTRALIGHT, 'light': pango.WEIGHT_LIGHT, 'normal': pango.WEIGHT_NORMAL, 'bold': pango.WEIGHT_BOLD, 'ultrabold': pango.WEIGHT_ULTRABOLD, 'heavy': pango.WEIGHT_HEAVY} style_variant_table = {'normal': pango.VARIANT_NORMAL, 'smallcaps': pango.VARIANT_SMALL_CAPS} style_underline_table = {'none': pango.UNDERLINE_NONE, 'single': pango.UNDERLINE_SINGLE, 'double': pango.UNDERLINE_DOUBLE} style_style_table = {'normal': pango.STYLE_NORMAL, 'oblique': pango.STYLE_OBLIQUE, 'italic': pango.STYLE_ITALIC} style_scale_table = { 'xx_small': pango.SCALE_XX_SMALL, 'x_small': pango.SCALE_X_SMALL, 'small': pango.SCALE_SMALL, 'medium': pango.SCALE_MEDIUM, 'large': pango.SCALE_LARGE, 'x_large': pango.SCALE_X_LARGE, 'xx_large': pango.SCALE_XX_LARGE, } def __init__(self, lang_name): """ The constructor takes only one argument: the language name. The constructor tries to load the syntax-definition from a syntax-file in one directory of the global path-list. An instance of this class IS a LanguageDefinition. You can pass it to the constructor of the CodeBuffer class. """ LanguageDefinition.__init__(self, []) ContentHandler.__init__(self) # search for syntax-files: fname = None for syntax_dir in SYNTAX_PATH: fname = os.path.join(syntax_dir, "%s.xml"%lang_name) if os.path.isfile(fname): break _log_debug("Loading syntaxfile %s"%fname) if not os.path.isfile(fname): raise Exception("No snytax-file for %s found!"%lang_name) xml.sax.parse(fname, self) # Dispatch start/end - document/element and chars def startDocument(self): self.__stack = [] def endDocument(self): del self.__stack def startElement(self, name, attr): self.__stack.append( (name, attr) ) if hasattr(self, "start_%s"%name): handler = getattr(self, "start_%s"%name) handler(attr) def endElement(self, name): if hasattr(self, "end_%s"%name): handler = getattr(self, "end_%s"%name) handler() del self.__stack[-1] def characters(self, txt): if not self.__stack: return name, attr = self.__stack[-1] if hasattr(self, "chars_%s"%name): handler = getattr(self, "chars_%s"%name) handler(txt) # Handle regexp-patterns def start_pattern(self, attr): self.__pattern = "" self.__group = 0 self.__flags = '' self.__style = attr['style'] if 'group' in attr.keys(): self.__group = int(attr['group']) if 'flags' in attr.keys(): self.__flags = attr['flags'] def end_pattern(self): rule = Pattern(self.__pattern, self.__style, self.__group, self.__flags) self._grammar.append(rule) del self.__pattern del self.__group del self.__flags del self.__style def chars_pattern(self, txt): self.__pattern += unescape(txt) # handle keyword-lists def start_keywordlist(self, attr): self.__style = "keyword" self.__flags = "" if 'style' in attr.keys(): self.__style = attr['style'] if 'flags' in attr.keys(): self.__flags = attr['flags'] self.__keywords = [] def end_keywordlist(self): kwlist = KeywordList(self.__keywords, self.__style, self.__flags) self._grammar.append(kwlist) del self.__keywords del self.__style del self.__flags def start_keyword(self, attr): self.__keywords.append("") def end_keyword(self): if not self.__keywords[-1]: del self.__keywords[-1] def chars_keyword(self, txt): parent,pattr = self.__stack[-2] if not parent == "keywordlist": return self.__keywords[-1] += unescape(txt) #handle String-definitions def start_string(self, attr): self.__style = "string" self.__escape = None if 'escape' in attr.keys(): self.__escape = attr['escape'] if 'style' in attr.keys(): self.__style = attr['style'] self.__start_pattern = "" self.__end_pattern = "" def end_string(self): strdef = String(self.__start_pattern, self.__end_pattern, self.__escape, self.__style) self._grammar.append(strdef) del self.__style del self.__escape del self.__start_pattern del self.__end_pattern def chars_starts(self, txt): self.__start_pattern += unescape(txt) def chars_ends(self, txt): self.__end_pattern += unescape(txt) # handle style def start_style(self, attr): self.__style_props = dict() self.__style_name = attr['name'] def end_style(self): self._styles[self.__style_name] = self.__style_props del self.__style_props del self.__style_name def start_property(self, attr): self.__style_prop_name = attr['name'] def chars_property(self, value): value.strip() # convert value if self.__style_prop_name in ['font','foreground','background',]: pass elif self.__style_prop_name == 'variant': if not value in self.style_variant_table.keys(): Exception("Unknown style-variant: %s"%value) value = self.style_variant_table[value] elif self.__style_prop_name == 'underline': if not value in self.style_underline_table.keys(): Exception("Unknown underline-style: %s"%value) value = self.style_underline_table[value] elif self.__style_prop_name == 'scale': if not value in self.style_scale_table.keys(): Exception("Unknown scale-style: %s"%value) value = self.style_scale_table[value] elif self.__style_prop_name == 'weight': if not value in self.style_weight_table.keys(): Exception("Unknown style-weight: %s"%value) value = self.style_weight_table[value] elif self.__style_prop_name == 'style': if not value in self.style_style_table[value]: Exception("Unknwon text-style: %s"%value) value = self.style_style_table[value] else: raise Exception("Unknown style-property %s"%self.__style_prop_name) # store value self.__style_props[self.__style_prop_name] = value class CodeBuffer(gtk.TextBuffer): """ This class extends the gtk.TextBuffer to support syntax-highlighting. You can use this class like a normal TextBuffer. """ def __init__(self, table=None, lang=None, styles={}): """ The constructor takes 3 optional arguments. table specifies a tag-table associated with the TextBuffer-instance. This argument will be passed directly to the constructor of the TextBuffer-class. lang specifies the language-definition. You have to load one using the SyntaxLoader-class or you may hard-code your syntax-definition using the LanguageDefinition-class. styles is a dictionary used to extend or overwrite the default styles provided by this module (DEFAULT_STYLE) and any language specific styles defined by the LanguageDefinition. """ gtk.TextBuffer.__init__(self, table) # default styles self.styles = DEFAULT_STYLES # update styles with lang-spec: if lang: self.styles.update(lang.get_styles()) # update styles with user-defined self.styles.update(styles) # create tags for name, props in self.styles.items(): style = dict(self.styles['DEFAULT']) # take default style.update(props) # and update with props self.create_tag(name, **style) # store lang-definition self._lang_def = lang self.connect_after("insert-text", self._on_insert_text) self.connect_after("delete-range", self._on_delete_range) self.connect('apply-tag', self._on_apply_tag) self._apply_tags = False def _on_apply_tag(self, buf, tag, start, end): # FIXME This is a hack! It allows apply-tag only while # _on_insert_text() and _on_delete_range() if not self._apply_tags: self.emit_stop_by_name('apply-tag') return True _log_debug("tag \"%s\" as %s"%(self.get_slice(start,end), tag.get_property("name"))) def _on_insert_text(self, buf, it, text, length): # if no syntax defined -> nop if not self._lang_def: return False it = it.copy() it.backward_chars(length) if not it.begins_tag(): it.backward_to_tag_toggle(None) _log_debug("Not tag-start -> moved iter to %i (%s)"%(it.get_offset(), it.get_char())) if it.begins_tag(self.get_tag_table().lookup("DEFAULT")): it.backward_to_tag_toggle(None) _log_debug("Iter at DEFAULT-start -> moved to %i (%s)"%(it.get_offset(), it.get_char())) self._apply_tags = True self.update_syntax(it) self._apply_tags = False def _on_delete_range(self, buf, start, end): # if no syntax defined -> nop if not self._lang_def: return False start = start.copy() if not start.begins_tag(): start.backward_to_tag_toggle(None) self._apply_tags = True self.update_syntax(start) self._apply_tags = False def update_syntax(self, start, end=None): """ More or less internal used method to update the syntax-highlighting. """ # if no lang set if not self._lang_def: return _log_debug("Update syntax from %i"%start.get_offset()) # if not end defined if not end: end = self.get_end_iter() # We do not use recursion -> long files exceed rec-limit! finished = False while not finished: # search first rule matching txt[start..end] mstart, mend, tagname = self._lang_def(self, start, end) # optimisation: if mstart-mend is allready tagged with tagname # -> finished if tagname: #if something found tag = self.get_tag_table().lookup(tagname) if mstart.begins_tag(tag) and mend.ends_tag(tag) and not mstart.equal(start): self.remove_all_tags(start,mstart) self.apply_tag_by_name("DEFAULT", start, mstart) _log_debug("Optimized: Found old tag at %i (%s)"%(mstart.get_offset(), mstart.get_char())) # finish finished = True continue # remove all tags from start..mend (mend == buffer-end if no match) self.remove_all_tags(start, mend) # make start..mstart = DEFAUL (mstart == buffer-end if no match) if not start.equal(mstart): _log_debug("Apply DEFAULT") self.apply_tag_by_name("DEFAULT", start, mstart) # nothing found -> finished if not tagname: finished = True continue # apply tag _log_debug("Apply %s"%tagname) self.apply_tag_by_name(tagname, mstart, mend) start = mend if start == end: finished = True continue def reset_language(self, lang_def): """ Reset the currently used language-definition. """ # remove all tags from complete text start = self.get_start_iter() self.remove_all_tags(start, self.get_end_iter()) # store lexer self._lang_def = lang_def # update styles from lang_def: if self._lang_def: self.update_styles(self._lang_def.get_styles()) # and ... self._apply_tags = True self.update_syntax(start) self._apply_tags = False def update_styles(self, styles): """ Update styles. This method may be used to reset any styles at runtime. """ self.styles.update(styles) table = self.get_tag_table() for name, props in styles.items(): style = self.styles['DEFAULT'] style.update(props) # if tagname is unknown: if not table.lookup(name): _log_debug("Create tag: %s (%s)"%(name, style)) self.create_tag(name, **style) else: # update tag tag = table.lookup(name) _log_debug("Update tag %s with (%s)"%(name, style)) map(lambda i: tag.set_property(i[0],i[1]), style.items())