diff --git a/bin/MultiMarkdown.pl b/bin/MultiMarkdown.pl index 8f36adf..a7bf049 100755 --- a/bin/MultiMarkdown.pl +++ b/bin/MultiMarkdown.pl @@ -1,138 +1,26 @@ #!/usr/bin/env perl -# MultiMarkdown -- A modification of John Gruber's original Markdown -# that adds new features and an output format that can more readily -# be converted into other document formats -# -# $Id: MultiMarkdown.pl 525 2009-06-15 18:45:44Z fletcher $ -# -# Original Code Copyright (c) 2004-2007 John Gruber -# -# -# MultiMarkdown changes Copyright (c) 2005-2009 Fletcher T. Penney -# -# -# MultiMarkdown Version 2.0.b6 -# -# Based on Markdown.pl 1.0.2b8 - Wed 09 May 2007 -# -# -# TODO: Change math mode delimiter? -# TODO: Still need to get the glossary working in non-memoir documents -# TODO: A mechanism to include arbitrary code (LaTeX, etc) without being "ugly" -# TODO: Look into discussion re: assigning classes to div's/span's on Markdown list. -# TODO: Improve support for tables with long items and overall width in LaTeX -# TODO: Need a syntax for long table cells in MMD, even if no rowspan feature yet -# TODO: Create utilities to convert MMD tables to/from tab-delimited - - -package MultiMarkdown; require 5.006_000; use strict; use warnings; use File::Basename; +use File::Spec; -eval {require MT}; # Test to see if we're running in MT. -unless ($@) { - require Text::ASCIIMathML; -} else { # Otherwise look for ASCIIMathML.pm next to MultiMarkdown.pl - my $me = $0; # Where am I? - - # Am I running in Windoze? - my $os = $^O; - - # Get just the directory portion - if ($os =~ /MSWin/) { - $me = dirname($me) . "\\"; - } else { - $me = dirname($me) . "/"; - } - require $me ."ASCIIMathML.pm"; -} - +eval {require MultiMarkdown}; +if ($@) { + my $me = readlink(__FILE__) || __FILE__; -use Digest::MD5 qw(md5_hex); -use vars qw($VERSION $g_use_metadata $g_base_url - $g_bibliography_title $g_allow_mathml $g_base_header_level $mathParser); -$VERSION = '2.0.b6'; - -$mathParser = new Text::ASCIIMathML(); - -## Disabled; causes problems under Perl 5.6.1: -# use utf8; -# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html - -# -# Global default settings: -# -my $g_empty_element_suffix = " />"; # Change to ">" for HTML output -my $g_tab_width = 4; -my $g_allow_mathml = 1; -my $g_base_header_level = 1; - -# -# Globals: -# - -# Reusable patterns to match balanced [brackets] and (parens). See -# Friedl's "Mastering Regular Expressions", 2nd Ed., pp. 328-331. -my ($g_nested_brackets, $g_nested_parens); -$g_nested_brackets = qr{ - (?> # Atomic matching - [^\[\]]+ # Anything other than brackets - | - \[ - (??{ $g_nested_brackets }) # Recursive set of nested brackets - \] - )* -}x; - -# Doesn't allow for whitespace, because we're using it to match URLs: -$g_nested_parens = qr{ - (?> # Atomic matching - [^()\s]+ # Anything other than parens or whitespace - | - \( - (??{ $g_nested_parens }) # Recursive set of nested brackets - \) - )* -}x; - - -# Table of hash values for escaped characters: -my %g_escape_table; -foreach my $char (split //, '\\`*_{}[]()>#+-.!') { - $g_escape_table{$char} = md5_hex($char); -} + my $path = dirname(dirname($me)); + $path = File::Spec->join($path, 'lib'); -# Global hashes, used by various utility routines -my %g_urls = (); -my %g_titles= (); -my %g_html_blocks = (); -my %g_metadata = (); -my %g_metadata_newline = (); -my %g_crossrefs = (); -my %g_footnotes = (); -my %g_attributes = (); -my @g_used_footnotes = (); -my $g_footnote_counter = 0; + unshift (@INC, $path); -my $g_citation_counter = 0; -my @g_used_references = (); -my %g_references = (); -$g_bibliography_title = "Bibliography"; - -$g_use_metadata = 1; -$g_metadata_newline{default} = "\n"; -$g_metadata_newline{keywords} = ", "; -my $g_document_format = ""; - -# Used to track when we're inside an ordered or unordered list -# (see _ProcessListItems() for details): -my $g_list_level = 0; + require MultiMarkdown; +} +import MultiMarkdown qw{markdown}; #### Blosxom plug-in interface ########################################## @@ -140,7 +28,7 @@ package MultiMarkdown; # which posts Markdown should process, using a "meta-markup: markdown" # header. If it's set to 0 (the default), Markdown will process all # entries. -my $g_blosxom_use_meta = 0; +our $g_blosxom_use_meta = 0; sub start { 1; } sub story { @@ -149,7 +37,7 @@ sub story { if ( (! $g_blosxom_use_meta) or (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) ){ - $$body_ref = Markdown($$body_ref); + $$body_ref = markdown($$body_ref); } 1; } @@ -197,10 +85,11 @@ sub story { my $text = shift; my $ctx = shift; my $raw = 0; + my %opts = (); if (defined $ctx) { my $output = $ctx->stash('multimarkdown_output'); if (defined $output && $output =~ m/^html/i) { - $g_empty_element_suffix = ">"; + $opts{empty_element_suffix} = ">"; $ctx->stash('multimarkdown_output', ''); } elsif (defined $output && $output eq 'raw') { @@ -209,10 +98,10 @@ sub story { } else { $raw = 0; - $g_empty_element_suffix = " />"; + $opts{empty_element_suffix} = " />"; } } - $text = $raw ? $text : Markdown($text); + $text = $raw ? $text : markdown($text, %opts); $text; }, }); @@ -232,16 +121,17 @@ sub story { on_format => sub { my $text = shift; my $ctx = shift; + my %opts = (); if (defined $ctx) { my $output = $ctx->stash('multimarkdown_output'); if (defined $output && $output eq 'html') { - $g_empty_element_suffix = ">"; + $opts{empty_element_suffix} = ">"; } else { - $g_empty_element_suffix = " />"; + $opts{empty_element_suffix} = " />"; } } - $text = Markdown($text); + $text = markdown($text, %opts); $text = $smartypants->($text, '1'); }, }); @@ -256,6 +146,8 @@ sub story { unless ( defined($blosxom::version) ) { use warnings; + my %opts = (); + #### Check for command-line switches: ################# my %cli_opts; use Getopt::Long; @@ -264,9 +156,24 @@ sub story { 'version', 'shortversion', 'html4tags', + 'markdownonly', ); + + if ($cli_opts{'markdownonly'} || basename($0) eq 'Markdown.pl') { + %opts = ( + allow_mathml => 0, + use_metadata => 0, + heading_ids => 0, + img_ids => 0, + codeblocks_newline => "\n", + disable_tables => 1, + disable_footnotes =>1, + disable_bibliography =>1, + ); + } + if ($cli_opts{'version'}) { # Version info - print "\nThis is MultiMarkdown, version $VERSION.\n"; + print "\nThis is MultiMarkdown, version $MultiMarkdown::VERSION.\n"; print "Original code Copyright 2004 John Gruber\n"; print "MultiMarkdown changes Copyright 2005-2009 Fletcher Penney\n"; print "http://fletcherpenney.net/multimarkdown/\n"; @@ -274,2297 +181,25 @@ sub story { exit 0; } if ($cli_opts{'shortversion'}) { # Just the version number string. - print $VERSION; + print $MultiMarkdown::VERSION; exit 0; } if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML - $g_empty_element_suffix = ">"; + $opts{empty_element_suffix} = ">"; } - #### Process incoming text: ########################### my $text; { local $/; # Slurp the whole file $text = <>; } - print Markdown($text); - } -} - - - -sub Markdown { -# -# Main function. The order in which other subs are called here is -# essential. Link and image substitutions need to happen before -# _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the -# and tags get encoded. -# - my $text = shift; - - # Clear the global hashes. If we don't clear these, you get conflicts - # from other articles when generating a page which contains more than - # one article (e.g. an index page that shows the N most recent - # articles): - %g_urls = (); - %g_titles = (); - %g_html_blocks = (); - %g_metadata = (); - %g_crossrefs = (); - %g_footnotes = (); - @g_used_footnotes = (); - $g_footnote_counter = 0; - @g_used_references = (); - %g_references = (); - $g_citation_counter = 0; - %g_attributes = (); - - - # Standardize line endings: - $text =~ s{\r\n}{\n}g; # DOS to Unix - $text =~ s{\r}{\n}g; # Mac to Unix - - # Make sure $text ends with a couple of newlines: - $text .= "\n\n"; - - # Convert all tabs to spaces. - $text = _Detab($text); - - # Strip any lines consisting only of spaces and tabs. - # This makes subsequent regexen easier to write, because we can - # match consecutive blank lines with /\n+/ instead of something - # contorted like /[ \t]*\n+/ . - $text =~ s/^[ \t]+$//mg; - - # Strip out MetaData - $text = _ParseMetaData($text) if $g_use_metadata; - - # And recheck for leading blank lines - $text =~ s/^\n+//s; - - # Turn block-level HTML blocks into hash entries - $text = _HashHTMLBlocks($text); - - # Strip footnote and link definitions, store in hashes. - $text = _StripFootnoteDefinitions($text); - - $text = _StripLinkDefinitions($text); - - _GenerateImageCrossRefs($text); - - $text = _StripMarkdownReferences($text); - - $text = _RunBlockGamut($text); - - $text = _DoMarkdownCitations($text); - - $text = _DoFootnotes($text); - - $text = _UnescapeSpecialChars($text); - - # Clean encoding within HTML comments - $text = _UnescapeComments($text); - - $text = _FixFootnoteParagraphs($text); - $text .= _PrintFootnotes(); - - $text .= _PrintMarkdownBibliography(); - - $text = _ConvertCopyright($text); - - if (lc($g_document_format) =~ /^complete\s*$/i) { - return xhtmlMetaData() . "\n\n" . $text . "\n\n"; - } elsif (lc($g_document_format) =~ /^snippet\s*$/i) { - return $text . "\n"; - } else { - return $g_document_format . textMetaData() . $text . "\n"; - } - -} - - -sub _StripLinkDefinitions { -# -# Strips link definitions from text, stores the URLs and titles in -# hash references. -# - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Link defs are in the form: ^[id]: url "optional title" - while ($text =~ s{ - # Pattern altered for MultiMarkdown - # in order to not match citations or footnotes - ^[ ]{0,$less_than_tab}\[([^#^].*)\]: # id = $1 - [ \t]* - \n? # maybe *one* newline - [ \t]* - ? # url = $2 - [ \t]* - \n? # maybe one newline - [ \t]* - (?: - (?<=\s) # lookbehind for whitespace - ["(] - (.+?) # title = $3 - [")] - [ \t]* - )? # title is optional - - # MultiMarkdown addition for attribute support - \n? - ( # Attributes = $4 - (?<=\s) # lookbehind for whitespace - (([ \t]*\n)?[ \t]*((\S+=\S+)|(\S+=".*?")))* - )? - [ \t]* - # /addition - (?:\n+|\Z) - } - {}mx) { -# $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive - $g_urls{lc $1} = $2; # Link IDs are case-insensitive - if ($3) { - $g_titles{lc $1} = $3; - $g_titles{lc $1} =~ s/"/"/g; - } - - # MultiMarkdown addition " - if ($4) { - $g_attributes{lc $1} = $4; - } - # /addition - } - - return $text; -} - -sub _StripHTML { - # Strip (X)HTML code from string - my $text = shift; - - $text =~ s/<.*?>//g; - - return $text; -} - -sub _HashHTMLBlocks { - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Hashify HTML blocks: - # We only want to do this for block-level HTML tags, such as headers, - # lists, and tables. That's because we still want to wrap

s around - # "paragraphs" that are wrapped in non-block-level tags, such as anchors, - # phrase emphasis, and spans. The list of tags we're looking for is - # hard-coded: - my $block_tags = qr{ - (?: - p | div | h[1-6] | blockquote | pre | table | - dl | ol | ul | script | noscript | form | - fieldset | iframe | ins | del - ) - }x; # MultiMarkdown does not include `math` in the above list so that - # Equations can optionally be included in separate paragraphs - - my $tag_attrs = qr{ - (?: # Match one attr name/value pair - \s+ # There needs to be at least some whitespace - # before each attribute name. - [\w.:_-]+ # Attribute name - \s*=\s* - (?: - ".+?" # "Attribute value" - | - '.+?' # 'Attribute value' - ) - )* # Zero or more - }x; - - my $empty_tag = qr{< \w+ $tag_attrs \s* />}xms; - my $open_tag = qr{< $block_tags $tag_attrs \s* >}xms; - my $close_tag = undef; # let Text::Balanced handle this - - use Text::Balanced qw(gen_extract_tagged); - my $extract_block = gen_extract_tagged($open_tag, $close_tag, undef, { ignore => [$empty_tag] }); - - my @chunks; - ## TO-DO: the 0,3 on the next line ought to respect the - ## tabwidth, or else, we should mandate 4-space tabwidth and - ## be done with it: - while ($text =~ s{^(([ ]{0,3}<)?.*\n)}{}m) { - my $cur_line = $1; - if (defined $2) { - # current line could be start of code block - - my ($tag, $remainder) = $extract_block->($cur_line . $text); - if ($tag) { - my $key = md5_hex($tag); - $g_html_blocks{$key} = $tag; - push @chunks, "\n\n" . $key . "\n\n"; - $text = $remainder; - } - else { - # No tag match, so toss $cur_line into @chunks - push @chunks, $cur_line; - } - } - else { - # current line could NOT be start of code block - push @chunks, $cur_line; - } - - } - push @chunks, $text; # Whatever is left. - - $text = join '', @chunks; - - - - # Special case just for


. It was easier to make a special case than - # to make the other regex more complicated. - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - <(hr) # start tag = $2 - \b # word break - ([^<>])*? # - /?> # the matching end tag - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - # Special case for standalone HTML comments: - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - (?s: - - ) - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - # PHP and ASP-style processor instructions ( and <%…%>) - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - (?s: - <([?%]) # $2 - .*? - \2> - ) - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - - return $text; -} - - -sub _RunBlockGamut { -# -# These are all the transformations that form block-level -# tags like paragraphs, headers, and list items. -# - my $text = shift; - - $text = _DoHeaders($text); - - # Do tables first to populate the table id's for cross-refs - # Escape
 so we don't get greedy with tables
-	$text = _DoTables($text);
-	
-	# And now, protect our tables
-	$text = _HashHTMLBlocks($text);
-
-	# Do Horizontal Rules:
-	$text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n tags around block-level tags.
-	$text = _HashHTMLBlocks($text);
-	$text = _FormParagraphs($text);
-
-	return $text;
-}
-
-
-sub _RunSpanGamut {
-#
-# These are all the transformations that occur *within* block-level
-# tags like paragraphs, headers, and list items.
-#
-	my $text = shift;
-
-	$text = _DoCodeSpans($text);
-	$text = _DoMathSpans($text);
-	$text = _EscapeSpecialCharsWithinTagAttributes($text);
-	$text = _EncodeBackslashEscapes($text);
-
-	# Process anchor and image tags. Images must come first,
-	# because ![foo][f] looks like an anchor.
-	$text = _DoImages($text);
-	$text = _DoAnchors($text);	
-
-	# Make links out of things like ``
-	# Must come after _DoAnchors(), because you can use < and >
-	# delimiters in inline links like [this]().
-	$text = _DoAutoLinks($text);
-	$text = _EncodeAmpsAndAngles($text);
-	$text = _DoItalicsAndBold($text);
-
-	# Do hard breaks:
-	$text =~ s/ {2,}\n/  -- encode [\ ` * _] so they
-# don't conflict with their use in Markdown for code, italics and strong.
-# We're replacing each such character with its corresponding MD5 checksum
-# value; this is likely overkill, but it should prevent us from colliding
-# with the escape values by accident.
-#
-	my $text = shift;
-	my $tokens ||= _TokenizeHTML($text);
-	$text = '';   # rebuild $text from the tokens
-
-	foreach my $cur_token (@$tokens) {
-		if ($cur_token->[0] eq "tag") {
-			$cur_token->[1] =~  s! \\ !$g_escape_table{'\\'}!gx;
-			$cur_token->[1] =~  s{ (?<=.)(?=.)  }{$g_escape_table{'`'}}gx;
-			$cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx;
-			$cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx;
-		}
-		$text .= $cur_token->[1];
-	}
-	return $text;
-}
-
-
-sub _DoAnchors {
-#
-# Turn Markdown link shortcuts into XHTML  tags.
-#
-	my $text = shift;
-
-	#
-	# First, handle reference-style links: [link text] [id]
-	#
-	$text =~ s{
-		(					# wrap whole match in $1
-		  \[
-		    ($g_nested_brackets)	# link text = $2
-		  \]
-
-		  [ ]?				# one optional space
-		  (?:\n[ ]*)?		# one optional newline followed by spaces
-
-		  \[
-		    (.*?)		# id = $3
-		  \]
-		)
-	}{
-		my $result;
-		my $whole_match = $1;
-		my $link_text   = $2;
-		my $link_id     = lc $3;
-
-		if ($link_id eq "") {
-			$link_id = lc $link_text;     # for shortcut links like [this][].
-		}
-
-		# Allow automatic cross-references to headers
-		my $label = Header2Label($link_id);
-		if (defined $g_urls{$link_id}) {
-			my $url = $g_urls{$link_id};
-			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
-			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
-			$result = "$}{$1};					# Remove <>'s surrounding URL, if present
-		$result = " tags.
-#
-	my $text = shift;
-
-	#
-	# First, handle reference-style labeled images: ![alt text][id]
-	#
-	$text =~ s{
-		(				# wrap whole match in $1
-		  !\[
-		    (.*?)		# alt text = $2
-		  \]
-
-		  [ ]?				# one optional space
-		  (?:\n[ ]*)?		# one optional newline followed by spaces
-
-		  \[
-		    (.*?)		# id = $3
-		  \]
-
-		)
-	}{
-		my $result;
-		my $whole_match = $1;
-		my $alt_text    = $2;
-		my $link_id     = lc $3;
-
-		if ($link_id eq "") {
-			$link_id = lc $alt_text;     # for shortcut links like ![this][].
-		}
-
-		$alt_text =~ s/"/"/g;
-		if (defined $g_urls{$link_id}) {
-			my $url = $g_urls{$link_id};
-			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
-			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
-			
-			my $label = Header2Label($alt_text);
-			$g_crossrefs{$label} = "#$label";
-			if (! defined $g_titles{$link_id}) {
-				$g_titles{$link_id} = $alt_text;
-			}
-			
-			$result = "\"$alt_text\"";$}{$1};					# Remove <>'s surrounding URL, if present
-
-		my $label = Header2Label($alt_text);
-		$g_crossrefs{$label} = "#$label";
-#		$g_titles{$label} = $alt_text;			# I think this line should not be here
-			
-		$result = "\"$alt_text\"";"  .  $header  .  "\n\n";
-	}egmx;
-
-	$text =~ s{ ^(.+?)(?:\s*(?"  .  $header  .  "\n\n";
-	}egmx;
-
-
-	# atx-style headers:
-	#	# Header 1
-	#	## Header 2
-	#	## Header 2 with closing hashes ##
-	#	...
-	#	###### Header 6
-	#
-	$text =~ s{
-			^(\#{1,6})	# $1 = string of #'s
-			[ \t]*
-			(.+?)		# $2 = Header text
-			[ \t]*
-			(?:(?"  .  $header  .  "\n\n";
-		}egmx;
-
-	return $text;
-}
-
-
-sub _DoLists {
-#
-# Form HTML ordered (numbered) and unordered (bulleted) lists.
-#
-	my $text = shift;
-	my $less_than_tab = $g_tab_width - 1;
-
-	# Re-usable patterns to match list item bullets and number markers:
-	my $marker_ul  = qr/[*+-]/;
-	my $marker_ol  = qr/\d+[.]/;
-	my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
-
-	# Re-usable pattern to match any entirel ul or ol list:
-	my $whole_list = qr{
-		(								# $1 = whole list
-		  (								# $2
-			[ ]{0,$less_than_tab}
-			(${marker_any})				# $3 = first list item marker
-			[ \t]+
-		  )
-		  (?s:.+?)
-		  (								# $4
-			  \z
-			|
-			  \n{2,}
-			  (?=\S)
-			  (?!						# Negative lookahead for another list item marker
-				[ \t]*
-				${marker_any}[ \t]+
-			  )
-		  )
-		)
-	}mx;
-
-	# We use a different prefix before nested lists than top-level lists.
-	# See extended comment in _ProcessListItems().
-	#
-	# Note: There's a bit of duplication here. My original implementation
-	# created a scalar regex pattern as the conditional result of the test on
-	# $g_list_level, and then only ran the $text =~ s{...}{...}egmx
-	# substitution once, using the scalar as the pattern. This worked,
-	# everywhere except when running under MT on my hosting account at Pair
-	# Networks. There, this caused all rebuilds to be killed by the reaper (or
-	# perhaps they crashed, but that seems incredibly unlikely given that the
-	# same script on the same server ran fine *except* under MT. I've spent
-	# more time trying to figure out why this is happening than I'd like to
-	# admit. My only guess, backed up by the fact that this workaround works,
-	# is that Perl optimizes the substition when it can figure out that the
-	# pattern will never change, and when this optimization isn't on, we run
-	# afoul of the reaper. Thus, the slightly redundant code that uses two
-	# static s/// patterns rather than one conditional pattern.
-
-	if ($g_list_level) {
-		$text =~ s{
-				^
-				$whole_list
-			}{
-				my $list = $1;
-				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
-
-				# Turn double returns into triple returns, so that we can make a
-				# paragraph for the last item in a list, if necessary:
-				$list =~ s/\n{2,}/\n\n\n/g;
-				my $result = _ProcessListItems($list, $marker_any);
-
-				# Trim any trailing whitespace, to put the closing ``
-				# up on the preceding line, to get it past the current stupid
-				# HTML block parser. This is a hack to work around the terrible
-				# hack that is the HTML block parser.
-				$result =~ s{\s+$}{};
-				$result = "<$list_type>" . $result . "\n";
-				$result;
-			}egmx;
-	}
-	else {
-		$text =~ s{
-				(?:(?<=\n\n)|\A\n?)
-				$whole_list
-			}{
-				my $list = $1;
-				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
-				# Turn double returns into triple returns, so that we can make a
-				# paragraph for the last item in a list, if necessary:
-				$list =~ s/\n{2,}/\n\n\n/g;
-				my $result = _ProcessListItems($list, $marker_any);
-				$result = "<$list_type>\n" . $result . "\n";
-				$result;
-			}egmx;
-	}
-
-
-	return $text;
-}
-
-
-sub _ProcessListItems {
-#
-#	Process the contents of a single ordered or unordered list, splitting it
-#	into individual list items.
-#
-
-	my $list_str = shift;
-	my $marker_any = shift;
-
-
-	# The $g_list_level global keeps track of when we're inside a list.
-	# Each time we enter a list, we increment it; when we leave a list,
-	# we decrement. If it's zero, we're not in a list anymore.
-	#
-	# We do this because when we're not inside a list, we want to treat
-	# something like this:
-	#
-	#		I recommend upgrading to version
-	#		8. Oops, now this line is treated
-	#		as a sub-list.
-	#
-	# As a single paragraph, despite the fact that the second line starts
-	# with a digit-period-space sequence.
-	#
-	# Whereas when we're inside a list (or sub-list), that line will be
-	# treated as the start of a sub-list. What a kludge, huh? This is
-	# an aspect of Markdown's syntax that's hard to parse perfectly
-	# without resorting to mind-reading. Perhaps the solution is to
-	# change the syntax rules such that sub-lists must start with a
-	# starting cardinal number; e.g. "1." or "a.".
-
-	$g_list_level++;
-
-	# trim trailing blank lines:
-	$list_str =~ s/\n{2,}\z/\n/;
-
-
-	$list_str =~ s{
-		(\n)?							# leading line = $1
-		(^[ \t]*)						# leading whitespace = $2
-		($marker_any) [ \t]+			# list marker = $3
-		((?s:.+?)						# list item text   = $4
-		(\n{1,2}))
-		(?= \n* (\z | \2 ($marker_any) [ \t]+))
-	}{
-		my $item = $4;
-		my $leading_line = $1;
-		my $leading_space = $2;
-
-		if ($leading_line or ($item =~ m/\n{2,}/)) {
-			$item = _RunBlockGamut(_Outdent($item));
-		}
-		else {
-			# Recursion for sub-lists:
-			$item = _DoLists(_Outdent($item));
-			chomp $item;
-			$item = _RunSpanGamut($item);
-		}
-
-		"
  • " . $item . "
  • \n"; - }egmx; - - $g_list_level--; - return $list_str; -} - - - -sub _DoCodeBlocks { -# -# Process Markdown `
    ` blocks.
    -#	
    -
    -	my $text = shift;
    -
    -	$text =~ s{
    -			(?:\n\n|\A)
    -			(	            # $1 = the code block -- one or more lines, starting with a space/tab
    -			  (?:
    -			    (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
    -			    .*\n+
    -			  )+
    -			)
    -			((?=^[ ]{0,$g_tab_width}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
    -		}{
    -			my $codeblock = $1;
    -			my $result; # return value
    -
    -			$codeblock = _EncodeCode(_Outdent($codeblock));
    -			$codeblock = _Detab($codeblock);
    -			$codeblock =~ s/\A\n+//; # trim leading newlines
    -			$codeblock =~ s/\n+\z//; # trim trailing newlines
    -
    -			$result = "\n\n
    " . $codeblock . "
    \n\n"; # CHANGED: Removed newline for MMD - - $result; - }egmx; - - return $text; -} - - -sub _DoCodeSpans { -# -# * Backtick quotes are used for spans. -# -# * You can use multiple backticks as the delimiters if you want to -# include literal backticks in the code span. So, this input: -# -# Just type ``foo `bar` baz`` at the prompt. -# -# Will translate to: -# -#

    Just type foo `bar` baz at the prompt.

    -# -# There's no arbitrary limit to the number of backticks you -# can use as delimters. If you need three consecutive backticks -# in your code, use four for delimiters, etc. -# -# * You can use spaces to get literal backticks at the edges: -# -# ... type `` `bar` `` ... -# -# Turns to: -# -# ... type `bar` ... -# - - my $text = shift; - - $text =~ s@ - (?$c
    "; - @egsx; - - return $text; -} - - -sub _EncodeCode { -# -# Encode/escape certain characters inside Markdown code runs. -# The point is that in code, these characters are literals, -# and lose their special Markdown meanings. -# - local $_ = shift; - - # Encode all ampersands; HTML entities are not - # entities within a Markdown code span. - s/&/&/g; - - # Encode $'s, but only if we're running under Blosxom. - # (Blosxom interpolates Perl variables in article bodies.) - { - no warnings 'once'; - if (defined($blosxom::version)) { - s/\$/$/g; - } - } - - - # Do the angle bracket song and dance: - s! < !<!gx; - s! > !>!gx; - - # Now, escape characters that are magic in Markdown: - s! \* !$g_escape_table{'*'}!gx; - s! _ !$g_escape_table{'_'}!gx; - s! { !$g_escape_table{'{'}!gx; - s! } !$g_escape_table{'}'}!gx; - s! \[ !$g_escape_table{'['}!gx; - s! \] !$g_escape_table{']'}!gx; - s! \\ !$g_escape_table{'\\'}!gx; - - return $_; -} - - -sub _DoItalicsAndBold { - my $text = shift; - - # Cave in - `*` and `_` behave differently... We'll see how it works out - - - # must go first: - $text =~ s{ (?$2}gsx; - - $text =~ s{ (?$2}gsx; - - # And now, a second pass to catch nested strong and emphasis special cases - $text =~ s{ (?$2}gsx; - - $text =~ s{ (?$2}gsx; - - # And now, allow `*` in the middle of words - - # must go first: - $text =~ s{ (\*\*) (?=\S) (.+?[*]*) (?<=\S) \1 } - {$2}gsx; - - $text =~ s{ (\*) (?=\S) (.+?) (?<=\S) \1 } - {$2}gsx; - - return $text; -} - - -sub _DoBlockQuotes { - my $text = shift; - - $text =~ s{ - ( # Wrap whole match in $1 - ( - ^[ \t]*>[ \t]? # '>' at the start of a line - .+\n # rest of the first line - (.+\n)* # subsequent consecutive lines - \n* # blanks - )+ - ) - }{ - my $bq = $1; - $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting - $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines - $bq = _RunBlockGamut($bq); # recurse - - $bq =~ s/^/ /g; - # These leading spaces screw with
     content, so we need to fix that:
    -			$bq =~ s{
    -					(\s*
    .+?
    ) - }{ - my $pre = $1; - $pre =~ s/^ //mg; - $pre; - }egsx; - - "
    \n$bq\n
    \n\n"; - }egmx; - - - return $text; -} - - -sub _FormParagraphs { -# -# Params: -# $text - string to process with html

    tags -# - my $text = shift; - - # Strip leading and trailing lines: - $text =~ s/\A\n+//; - $text =~ s/\n+\z//; - - my @grafs = split(/\n{2,}/, $text); - - # - # Wrap

    tags. - # - foreach (@grafs) { - unless (defined( $g_html_blocks{$_} )) { - $_ = _RunSpanGamut($_); - s/^([ \t]*)/

    /; - $_ .= "

    "; - } - } - - # - # Unhashify HTML blocks - # -# foreach my $graf (@grafs) { -# my $block = $g_html_blocks{$graf}; -# if (defined $block) { -# $graf = $block; -# } -# } - - foreach my $graf (@grafs) { - # Modify elements of @grafs in-place... - my $block = $g_html_blocks{$graf}; - if (defined $block) { - $graf = $block; - if ($block =~ m{ - \A - ( # $1 =
    tag -
    ]* - \b - markdown\s*=\s* (['"]) # $2 = attr quote char - 1 - \2 - [^>]* - > - ) - ( # $3 = contents - .* - ) - (
    ) # $4 = closing tag - \z - - }xms - ) { - my ($div_open, $div_content, $div_close) = ($1, $3, $4); - - # We can't call Markdown(), because that resets the hash; - # that initialization code should be pulled into its own sub, though. - $div_content = _HashHTMLBlocks($div_content); - $div_content = _StripLinkDefinitions($div_content); - $div_content = _RunBlockGamut($div_content); - $div_content = _UnescapeSpecialChars($div_content); - - $div_open =~ s{\smarkdown\s*=\s*(['"]).+?\1}{}ms; - - $graf = $div_open . "\n" . $div_content . "\n" . $div_close; - } - } - } - - - return join "\n\n", @grafs; -} - - -sub _EncodeAmpsAndAngles { -# Smart processing for ampersands and angle brackets that need to be encoded. - - my $text = shift; - - # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: - # http://bumppo.net/projects/amputator/ - $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; - - # Encode naked <'s - $text =~ s{<(?![a-z/?\$!])}{<}gi; - - return $text; -} - - -sub _EncodeBackslashEscapes { -# -# Parameter: String. -# Returns: The string, with after processing the following backslash -# escape sequences. -# - local $_ = shift; - - s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first. - s! \\` !$g_escape_table{'`'}!gx; - s! \\\* !$g_escape_table{'*'}!gx; - s! \\_ !$g_escape_table{'_'}!gx; - s! \\\{ !$g_escape_table{'{'}!gx; - s! \\\} !$g_escape_table{'}'}!gx; - s! \\\[ !$g_escape_table{'['}!gx; - s! \\\] !$g_escape_table{']'}!gx; - s! \\\( !$g_escape_table{'('}!gx; - s! \\\) !$g_escape_table{')'}!gx; - s! \\> !$g_escape_table{'>'}!gx; - s! \\\# !$g_escape_table{'#'}!gx; - s! \\\+ !$g_escape_table{'+'}!gx; - s! \\\- !$g_escape_table{'-'}!gx; - s! \\\. !$g_escape_table{'.'}!gx; - s{ \\! }{$g_escape_table{'!'}}gx; - - return $_; -} - - -sub _DoAutoLinks { - my $text = shift; - - $text =~ s{<((https?|ftp|dict):[^'">\s]+)>}{
    $1}gi; - - # Email addresses: - $text =~ s{ - < - (?:mailto:)? - ( - [-.\w]+ - \@ - [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ - ) - > - }{ - _EncodeEmailAddress( _UnescapeSpecialChars($1) ); - }egix; - - return $text; -} - - -sub _EncodeEmailAddress { -# -# Input: an email address, e.g. "foo@example.com" -# -# Output: the email address as a mailto link, with each character -# of the address encoded as either a decimal or hex entity, in -# the hopes of foiling most address harvesting spam bots. E.g.: -# -# foo -# @example.com -# -# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk -# mailing list: -# - - my $addr = shift; - - srand; - my @encode = ( - sub { '&#' . ord(shift) . ';' }, - sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, - sub { shift }, - ); - - $addr = "mailto:" . $addr; - - $addr =~ s{(.)}{ - my $char = $1; - if ( $char eq '@' ) { - # this *must* be encoded. I insist. - $char = $encode[int rand 1]->($char); - } elsif ( $char ne ':' ) { - # leave ':' alone (to spot mailto: later) - my $r = rand; - # roughly 10% raw, 45% hex, 45% dec - $char = ( - $r > .9 ? $encode[2]->($char) : - $r < .45 ? $encode[1]->($char) : - $encode[0]->($char) - ); - } - $char; - }gex; - - $addr = qq{$addr}; - $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part - - return $addr; -} - - -sub _UnescapeSpecialChars { -# -# Swap back in all the special characters we've hidden. -# - my $text = shift; - - while( my($char, $hash) = each(%g_escape_table) ) { - $text =~ s/$hash/$char/g; - } - return $text; -} - - -sub _TokenizeHTML { -# -# Parameter: String containing HTML markup. -# Returns: Reference to an array of the tokens comprising the input -# string. Each token is either a tag (possibly with nested, -# tags contained therein, such as , or a -# run of text between tags. Each element of the array is a -# two-element array; the first is either 'tag' or 'text'; -# the second is the actual value. -# -# -# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. -# -# - - my $str = shift; - my $pos = 0; - my $len = length $str; - my @tokens; - - my $depth = 6; - my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); - my $match = qr/(?s: ) | # comment - (?s: <\? .*? \?> ) | # processing instruction - $nested_tags/ix; # nested tags - - while ($str =~ m/($match)/g) { - my $whole_tag = $1; - my $sec_start = pos $str; - my $tag_start = $sec_start - length $whole_tag; - if ($pos < $tag_start) { - push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; - } - push @tokens, ['tag', $whole_tag]; - $pos = pos $str; + print markdown($text, \%opts); } - push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; - - return \@tokens; -} - - -sub _Outdent { -# -# Remove one level of line-leading tabs or spaces -# - my $text = shift; - - $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; - return $text; } - -sub _Detab { -# -# Cribbed from a post by Bart Lateur: -# -# - my $text = shift; - - $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; - return $text; -} - -# -# MultiMarkdown Routines -# - -sub _ParseMetaData { - my $text = shift; - my $clean_text = ""; - - my ($inMetaData, $currentKey) = (1,''); - - # If only metadata is "Format: complete" then skip - - if ($text =~ s/^(Format):\s*complete\n(.*?)\n/$2\n/is) { - # If "Format: complete" was added automatically, don't force first - # line of text to be metadata - $g_metadata{$1}= "complete"; - $g_document_format = "complete"; - } - - foreach my $line ( split /\n/, $text ) { - $line =~ /^$/ and $inMetaData = 0; - if ($inMetaData) { - if ($line =~ /^([a-zA-Z0-9][0-9a-zA-Z _-]*?):\s*(.*)$/ ) { - $currentKey = $1; - my $meta = $2; - $currentKey =~ s/\s+/ /g; - $currentKey =~ s/\s$//; - $g_metadata{$currentKey} = $meta; - if (lc($currentKey) eq "format") { - $g_document_format = lc($g_metadata{$currentKey}); - } - if (lc($currentKey) eq "base url") { - $g_base_url = $g_metadata{$currentKey}; - } - if (lc($currentKey) eq "bibliography title") { - $g_bibliography_title = $g_metadata{$currentKey}; - $g_bibliography_title =~ s/\s*$//; - } - if (lc($currentKey) eq "base header level") { - $g_base_header_level = $g_metadata{$currentKey}; - } - if (!$g_metadata_newline{$currentKey}) { - $g_metadata_newline{$currentKey} = $g_metadata_newline{default}; - } - } else { - if ($currentKey eq "") { - # No metadata present - $clean_text .= "$line\n"; - $inMetaData = 0; - next; - } - if ($line =~ /^\s*(.+)$/ ) { - $g_metadata{$currentKey} .= "$g_metadata_newline{$currentKey}$1"; - } - } - } else { - $clean_text .= "$line\n"; - } - } - - return $clean_text; -} - -sub _StripFootnoteDefinitions { - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - while ($text =~ s{ - \n[ ]{0,$less_than_tab}\[\^([^\n]+?)\]\:[ \t]*# id = $1 - \n? - (.*?)\n{1,2} # end at new paragraph - ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc - } - {\n}sx) - { - my $id = $1; - my $footnote = "$2\n"; - $footnote =~ s/^[ ]{0,$g_tab_width}//gm; - - $g_footnotes{id2footnote($id)} = $footnote; - } - - return $text; -} - -sub _DoFootnotes { - my $text = shift; - - # First, run routines that get skipped in footnotes - foreach my $label (sort keys %g_footnotes) { - my $footnote = _RunBlockGamut($g_footnotes{$label}); - - $footnote = _DoMarkdownCitations($footnote); - $g_footnotes{$label} = $footnote; - } - - $text =~ s{ - \[\^(.+?)\] # id = $1 - }{ - my $result = ""; - my $id = id2footnote($1); - if (defined $g_footnotes{$id} ) { - $g_footnote_counter++; - if ($g_footnotes{$id} =~ /^(

    )?glossary:/i) { - $result = "$g_footnote_counter"; - } else { - $result = "$g_footnote_counter"; - } - push (@g_used_footnotes,$id); - } - $result; - }xsge; - - return $text; -} - -sub _FixFootnoteParagraphs { - my $text = shift; - - $text =~ s/^\\<\/footnote\>/<\/footnote>/gm; - - return $text; -} - -sub _PrintFootnotes{ - my $footnote_counter = 0; - my $result = ""; - - foreach my $id (@g_used_footnotes) { - $footnote_counter++; - my $footnote = $g_footnotes{$id}; - my $footnote_closing_tag = ""; - - $footnote =~ s/(\<\/(p(re)?|ol|ul)\>)$//; - $footnote_closing_tag = $1; - - if ($footnote =~ s/^(

    )?glossary:\s*//i) { - # Add some formatting for glossary entries - - $footnote =~ s{ - ^(.*?) # $1 = term - \s* - (?:\(([^\(\)]*)\)[^\n]*)? # $2 = optional sort key - \n - }{ - my $glossary = "$1"; - - if ($2) { - $glossary.="$2"; - }; - - $glossary . ":

    "; - }egsx; - - $result.="

  • $footnote ↩$footnote_closing_tag
  • \n\n"; - } else { - $result.="
  • $footnote ↩$footnote_closing_tag
  • \n\n"; - } - } - $result .= "\n
    "; - - if ($footnote_counter > 0) { - $result = "\n\n
    \n\n\n".$result; - } else { - $result = ""; - } - - $result= _UnescapeSpecialChars($result); - return $result; -} - -sub Header2Label { - my $header = shift; - my $label = lc $header; - $label =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters - while ($label =~ s/^[^A-Za-z]//g) - {}; # Strip illegal leading characters - return $label; -} - -sub id2footnote { - # Since we prepend "fn:", we can allow leading digits in footnotes - my $id = shift; - my $footnote = lc $id; - $footnote =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters - return $footnote; -} - - -sub xhtmlMetaData { - my $result = qq{\n}; - - # This screws up xsltproc - make sure to use `-nonet -novalid` if you - # have difficulty - if ($g_allow_mathml) { - $result .= qq{ -\n}; - - $result.= qq{\n\t\n}; - } else { - $result .= qq{\n}; - - $result.= qq!\n\t\n!; - } - - $result.= "\t\t\n"; - - foreach my $key (sort keys %g_metadata ) { - # Strip trailing spaces - $g_metadata{$key} =~ s/(\s)*$//s; - - # Strip spaces from key - my $export_key = $key; - $export_key =~ s/\s//g; - - if (lc($key) eq "title") { - $result.= "\t\t" . _EncodeAmpsAndAngles($g_metadata{$key}) . "\n"; - } elsif (lc($key) eq "css") { - $result.= "\t\t\n/g; - - if ($result ne "") { - $result.= "\n"; - } - - return $result; -} - -sub _ConvertCopyright{ - my $text = shift; - # Convert to an XML compatible form of copyright symbol - - $text =~ s/©/©/gi; - - return $text; -} - - -sub _DoTables { - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Algorithm inspired by PHP Markdown Extra's table support - # - - # Reusable regexp's to match table - - my $line_start = qr{ - [ ]{0,$less_than_tab} - }mx; - - my $table_row = qr{ - [^\n]*?\|[^\n]*?\n - }mx; - - my $first_row = qr{ - $line_start - \S+.*?\|.*?\n - }mx; - - my $table_rows = qr{ - (\n?$table_row) - }mx; - - my $table_caption = qr{ - $line_start - \[.*?\][ \t]*\n - }mx; - - my $table_divider = qr{ - $line_start - [\|\-\+\:\.][ \-\+\|\:\.]* \| [ \-\+\|\:\.]* - }mx; - - my $whole_table = qr{ - ($table_caption)? # Optional caption - ($first_row # First line must start at beginning - ($table_row)*?)? # Header Rows - $table_divider # Divider/Alignment definitions - $table_rows+ # Body Rows - ($table_caption)? # Optional caption - }mx; - - - # Find whole tables, then break them up and process them - - $text =~ s{ - ^($whole_table) # Whole table in $1 - (\n|\Z) # End of file or 2 blank lines - }{ - my $table = $1; - - # Clean extra spaces at end of lines - - # they cause the processing to choke - $table =~ s/[\t ]*\n/\n/gs; - - my $result = "\n"; - my @alignments; - my $use_row_header = 1; - - # Add Caption, if present - - if ($table =~ s/^$line_start(?:\[\s*(.*)\s*\])?(?:\[\s*(.*?)\s*\])[ \t]*$//m) { - my $table_id = ""; - my $table_caption = ""; - - $table_id = Header2Label($2); - - if (defined $1) { - $table_caption = $1; - } else { - $table_caption = $2; - } - $result .= "\n"; - - $g_crossrefs{$table_id} = "#$table_id"; - $g_titles{$table_id} = "see table"; # captions with "stuff" in them break links - } - - # If a second "caption" is present, treat it as a summary - # However, this is not valid in XHTML 1.0 Strict - # But maybe in future - - # A summary might be longer than one line - if ($table =~ s/\n$line_start\[\s*(.*?)\s*\][ \t]*\n/\n/s) { - # $result .= "" . _RunSpanGamut($1) . "\n"; - } - - # Now, divide table into header, alignment, and body - - # First, add leading \n in case there is no header - - $table = "\n" . $table; - - # Need to be greedy - - $table =~ s/\n($table_divider)\n(($table_rows)+)//s; - - my $body = ""; - my $alignment_string = ""; - if (defined $1){ - $alignment_string = $1; - } - if (defined $2){ - $body = $2; - } - - # Process column alignment - while ($alignment_string =~ /\|?\s*(.+?)\s*(\||\Z)/gs) { - my $cell = _RunSpanGamut($1); - if ($cell =~ /\+/){ - $result .= "$cell\n"; - } else { - $result .= "\t<$cell_type$colspan>$cell\n"; - } - $count++; - } - $result .= "\n"; - } - - # Strip out empty sections - $result =~ s/\s*<\/thead>\s*//s; - - # Handle pull-quotes - - # This might be too specific for my needs. If others want it - # removed, I am open to discussion. - - $result =~ s/
    " . _RunSpanGamut($table_caption). "
    \s*\s*/
    \n\n/s; - - $result .= "\n
    \n"; - $result - }egmx; - - my $table_body = qr{ - ( # wrap whole match in $2 - - (.*?\|.*?)\n # wrap headers in $3 - - [ ]{0,$less_than_tab} - ($table_divider) # alignment in $4 - - ( # wrap cells in $5 - $table_rows - ) - ) - }mx; - - return $text; -} - - -sub _DoAttributes{ - my $id = shift; - my $result = ""; - - if (defined $g_attributes{$id}) { - my $attributes = $g_attributes{$id}; - while ($attributes =~ s/(\S+)="(.*?)"//) { - $result .= " $1=\"$2\""; - } - while ($attributes =~ /(\S+)=(\S+)/g) { - $result .= " $1=\"$2\""; - } - } - - return $result; -} - - -sub _StripMarkdownReferences { - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - while ($text =~ s{ - \n\[\#(.+?)\]:[ \t]* # id = $1 - \n? - (.*?)\n{1,2} # end at new paragraph - ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc - } - {\n}sx) - { - my $id = $1; - my $reference = "$2\n"; - - $reference =~ s/^[ ]{0,$g_tab_width}//gm; - - $reference = _RunBlockGamut($reference); - - # strip leading and trailing

    tags (they will be added later) - $reference =~ s/^\//s; - $reference =~ s/\<\/p\>\s*$//s; - - $g_references{$id} = $reference; - } - - return $text; -} - -sub _DoMarkdownCitations { - my $text = shift; - - $text =~ s{ # Allow for citations without locator to be written - \[\#([^\[]*?)\] # in usual manner, e.g. [#author][] rather than - [ ]? # [][#author] - (?:\n[ ]*)? - \[\s*\] - }{ - "[][#$1]"; - }xsge; - - $text =~ s{ - \[([^\[]*?)\] # citation text = $1 - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - \[\#(.*?)\] # id = $2 - }{ - my $result; - my $anchor_text = $1; - my $id = $2; - my $count; - - # implement equivalent to \citet - my $textual_string = ""; - if ($anchor_text =~ s/^(.*?);\s*//) { - $textual_string = "$1"; - } - - if (defined $g_references{$id} ) { - my $citation_counter=0; - - # See if citation has been used before - foreach my $old_id (@g_used_references) { - $citation_counter++; - $count = $citation_counter if ($old_id eq $id); - } - - if (! defined $count) { - $g_citation_counter++; - $count = $g_citation_counter; - push (@g_used_references,$id); - } - - $result = "$textual_string ($count"; - - if ($anchor_text ne "") { - $result .=", $anchor_text"; - } - - $result .= ")"; - } else { - # No reference exists - $result = "$textual_string ($id"; - - if ($anchor_text ne "") { - $result .=", $anchor_text"; - } - - $result .= ")"; - } - - if (Header2Label($anchor_text) eq "notcited"){ - $result = ""; - } - $result; - }xsge; - - return $text; - -} - -sub _PrintMarkdownBibliography{ - my $citation_counter = 0; - my $result; - - foreach my $id (@g_used_references) { - $citation_counter++; - $result.="

    [$citation_counter] $g_references{$id}

    \n\n"; - } - $result .= "
    "; - - if ($citation_counter > 0) { - $result = "\n\n
    \n$g_bibliography_title

    \n\n".$result; - } else { - $result = ""; - } - - return $result; -} - -sub _GenerateImageCrossRefs { - my $text = shift; - - # - # First, handle reference-style labeled images: ![alt text][id] - # - $text =~ s{ - ( # wrap whole match in $1 - !\[ - (.*?) # alt text = $2 - \] - - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - - \[ - (.*?) # id = $3 - \] - - ) - }{ - my $result; - my $whole_match = $1; - my $alt_text = $2; - my $link_id = lc $3; - - if ($link_id eq "") { - $link_id = lc $alt_text; # for shortcut links like ![this][]. - } - - $alt_text =~ s/"/"/g; - if (defined $g_urls{$link_id}) { - my $label = Header2Label($alt_text); - $g_crossrefs{$label} = "#$label"; - } - else { - # If there's no such link ID, leave intact: - $result = $whole_match; - } - - $whole_match; - }xsge; - - # - # Next, handle inline images: ![alt text](url "optional title") - # Don't forget: encode * and _ - - $text =~ s{ - ( # wrap whole match in $1 - !\[ - (.*?) # alt text = $2 - \] - \( # literal paren - [ \t]* - ? # src url = $3 - [ \t]* - ( # $4 - (['"]) # quote char = $5 ' - (.*?) # title = $6 - \5 # matching quote - [ \t]* - )? # title is optional - \) - ) - }{ - my $result; - my $whole_match = $1; - my $alt_text = $2; - - $alt_text =~ s/"/"/g; - my $label = Header2Label($alt_text); - $g_crossrefs{$label} = "#$label"; - $whole_match; - }xsge; - - return $text; -} - -sub _FindMathEquations{ - my $text = shift; - - $text =~ s{ - (\]*)id=\"(.*?)\"> # " - }{ - my $label = Header2Label($2); - my $header = _RunSpanGamut($2); - - $g_crossrefs{$label} = "#$label"; - $g_titles{$label} = $header; - - $1 . "id=\"$label\">"; - }xsge; - - return $text; -} - -sub _DoMathSpans { - # Based on Gruber's _DoCodeSpans - - my $text = shift; - my $display_as_block = 0; - $display_as_block = 1 if ($text =~ /^<<[^\>\>]*>>$/); - - $text =~ s{ - (?>) - }{ - my $m = "$2"; - my $label = ""; - my @attr = (xmlns=>"http://www.w3.org/1998/Math/MathML"); - - if (defined $3) { - $label = Header2Label($3); - my $header = _RunSpanGamut($3); - - $g_crossrefs{$label} = "#$label"; - $g_titles{$label} = $header; - } - $m =~ s/^[ \t]*//g; # leading whitespace - $m =~ s/[ \t]*$//g; # trailing whitespace - push(@attr,(id=>"$label")) if ($label ne ""); - push(@attr,(display=>"block")) if ($display_as_block == 1); - - $m = $mathParser->TextToMathML($m,\@attr); - "$m"; - }egsx; - - return $text; -} - -sub _DoDefinitionLists { - # Uses the syntax proposed by Michel Fortin in PHP Markdown Extra - - my $text = shift; - my $less_than_tab = $g_tab_width -1; - - my $line_start = qr{ - [ ]{0,$less_than_tab} - }mx; - - my $term = qr{ - $line_start - [^:\s][^\n]*\n - }sx; - - my $definition = qr{ - \n?[ ]{0,$less_than_tab} - \:[ \t]+(.*?)\n - ((?=\n?\:)|\n|\Z) # Lookahead for next definition, two returns, - # or the end of the document - }sx; - - my $definition_block = qr{ - ((?:$term)+) # $1 = one or more terms - ((?:$definition)+) # $2 = by one or more definitions - }sx; - - my $definition_list = qr{ - (?:$definition_block\n*)+ # One ore more definition blocks - }sx; - - $text =~ s{ - ($definition_list) # $1 = the whole list - }{ - my $list = $1; - my $result = $1; - - $list =~ s{ - (?:$definition_block)\n* - }{ - my $terms = $1; - my $defs = $2; - - $terms =~ s{ - [ ]{0,$less_than_tab} - (.*) - \s* - }{ - my $term = $1; - my $result = ""; - $term =~ s/^\s*(.*?)\s*$/$1/; - if ($term !~ /^\s*$/){ - $result = "
    " . _RunSpanGamut($1) . "
    \n"; - } - $result; - }xmge; - - $defs =~ s{ - $definition - }{ - my $def = $1 . "\n"; - $def =~ s/^[ ]{0,$g_tab_width}//gm; - "
    \n" . _RunBlockGamut($def) . "\n
    \n"; - }xsge; - - $terms . $defs . "\n"; - }xsge; - - "
    \n" . $list . "
    \n\n"; - }xsge; - - return $text -} - -sub _UnescapeComments{ - # Remove encoding inside comments - # Based on proposal by Toras Doran (author of Text::MultiMarkdown) - - my $text = shift; - $text =~ s{ - (?<=) # End comments - }{ - my $t = $1; - $t =~ s/&/&/g; - $t =~ s/</join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - =head1 NAME mmd2LaTeX.pl - utility script for MultiMarkdown to convert MultiMarkdown text diff --git a/bin/mmd2PDF.pl b/bin/mmd2PDF.pl index 8a8badd..8f98aac 100755 --- a/bin/mmd2PDF.pl +++ b/bin/mmd2PDF.pl @@ -24,15 +24,15 @@ use warnings; use File::Basename; +use File::Spec; use Cwd; use Cwd 'abs_path'; # Determine where MMD is installed. Use a "common installation" if available. -my $me = $0; # Where is this script located? +my $me = readlink(__FILE__) || __FILE__; # Where is this script located? my $MMDPath = LocateMMD($me); - # Determine whether we are in "file mode" or "stdin mode" my $count = @ARGV; @@ -79,84 +79,47 @@ sub LocateMMD { $me = dirname($me); - if ($os =~ /MSWin/) { - # We're running Windows - - # First check our directory to see if we're running inside MMD - - if ( -f "$me\\MultiMarkdown\\Support.pm") { - $MMDPath = "$me\\.."; - } - - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { - if ( -d "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; - } elsif ( -d "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; - } - } + my @candidates = (); - # Load the MultiMarkdown::Support.pm module - do "$MMDPath\\bin\\MultiMarkdown\\Support.pm" if ($MMDPath ne ""); + if ( -f File::Spec->join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - - =head1 NAME mmd2PDF - utility script for MultiMarkdown to convert MultiMarkdown text @@ -211,4 +174,4 @@ =head1 COPYRIGHT AND LICENSE 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA -=cut \ No newline at end of file +=cut diff --git a/bin/mmd2PDFXeLaTeX.pl b/bin/mmd2PDFXeLaTeX.pl index 14877a4..b7ac504 100755 --- a/bin/mmd2PDFXeLaTeX.pl +++ b/bin/mmd2PDFXeLaTeX.pl @@ -24,15 +24,15 @@ use warnings; use File::Basename; +use File::Spec; use Cwd; use Cwd 'abs_path'; # Determine where MMD is installed. Use a "common installation" if available. -my $me = $0; # Where is this script located? +my $me = readlink(__FILE__) || __FILE__; # Where is this script located? my $MMDPath = LocateMMD($me); - # Determine whether we are in "file mode" or "stdin mode" my $count = @ARGV; @@ -79,83 +79,47 @@ sub LocateMMD { $me = dirname($me); - if ($os =~ /MSWin/) { - # We're running Windows - - # First check our directory to see if we're running inside MMD - - if ( -f "$me\\MultiMarkdown\\Support.pm") { - $MMDPath = "$me\\.."; - } - - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { - if ( -d "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; - } elsif ( -d "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; - } - } + my @candidates = (); - # Load the MultiMarkdown::Support.pm module - do "$MMDPath\\bin\\MultiMarkdown\\Support.pm" if ($MMDPath ne ""); + if ( -f File::Spec->join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - =head1 NAME mmd2PDFXeLaTeX - utility script for MultiMarkdown to convert MultiMarkdown @@ -210,4 +174,4 @@ =head1 COPYRIGHT AND LICENSE 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA -=cut \ No newline at end of file +=cut diff --git a/bin/mmd2RTF.pl b/bin/mmd2RTF.pl index e64ad78..78946df 100755 --- a/bin/mmd2RTF.pl +++ b/bin/mmd2RTF.pl @@ -24,16 +24,15 @@ use warnings; use File::Basename; +use File::Spec; use Cwd; use Cwd 'abs_path'; - # Determine where MMD is installed. Use a "common installation" if available. -my $me = $0; # Where is this script located? +my $me = readlink(__FILE__) || __FILE__; # Where is this script located? my $MMDPath = LocateMMD($me); - # Determine whether we are in "file mode" or "stdin mode" my $count = @ARGV; @@ -84,83 +83,47 @@ sub LocateMMD { $me = dirname($me); - if ($os =~ /MSWin/) { - # We're running Windows - - # First check our directory to see if we're running inside MMD - - if ( -f "$me\\MultiMarkdown\\Support.pm") { - $MMDPath = "$me\\.."; - } - - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { - if ( -d "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; - } elsif ( -d "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; - } - } + my @candidates = (); - # Load the MultiMarkdown::Support.pm module - do "$MMDPath\\bin\\MultiMarkdown\\Support.pm" if ($MMDPath ne ""); + if ( -f File::Spec->join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - =head1 NAME mmd2XHTML - utility script for MultiMarkdown to convert MultiMarkdown text @@ -215,4 +178,4 @@ =head1 COPYRIGHT AND LICENSE 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA -=cut \ No newline at end of file +=cut diff --git a/bin/mmd2XHTML.pl b/bin/mmd2XHTML.pl index 79af285..0751b07 100755 --- a/bin/mmd2XHTML.pl +++ b/bin/mmd2XHTML.pl @@ -24,16 +24,15 @@ use warnings; use File::Basename; +use File::Spec; use Cwd; use Cwd 'abs_path'; - # Determine where MMD is installed. Use a "common installation" if available. -my $me = $0; # Where is this script located? +my $me = readlink(__FILE__) || __FILE__; # Where is this script located? my $MMDPath = LocateMMD($me); - # Determine whether we are in "file mode" or "stdin mode" my $count = @ARGV; @@ -84,83 +83,47 @@ sub LocateMMD { $me = dirname($me); - if ($os =~ /MSWin/) { - # We're running Windows - - # First check our directory to see if we're running inside MMD - - if ( -f "$me\\MultiMarkdown\\Support.pm") { - $MMDPath = "$me\\.."; - } - - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { - if ( -d "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; - } elsif ( -d "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; - } - } + my @candidates = (); - # Load the MultiMarkdown::Support.pm module - do "$MMDPath\\bin\\MultiMarkdown\\Support.pm" if ($MMDPath ne ""); + if ( -f File::Spec->join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - =head1 NAME mmd2XHTML - utility script for MultiMarkdown to convert MultiMarkdown text @@ -215,4 +178,4 @@ =head1 COPYRIGHT AND LICENSE 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA -=cut \ No newline at end of file +=cut diff --git a/bin/mmd2letter.pl b/bin/mmd2letter.pl index 3e2e606..146838f 100755 --- a/bin/mmd2letter.pl +++ b/bin/mmd2letter.pl @@ -25,15 +25,15 @@ use warnings; use File::Basename; +use File::Spec; use Cwd; use Cwd 'abs_path'; # Determine where MMD is installed. Use a "common installation" if available. -my $me = $0; # Where is this script located? +my $me = readlink(__FILE__) || __FILE__; # Where is this script located? my $MMDPath = LocateMMD($me); - # Determine whether we are in "file mode" or "stdin mode" my $count = @ARGV; @@ -90,83 +90,47 @@ sub LocateMMD { $me = dirname($me); - if ($os =~ /MSWin/) { - # We're running Windows - - # First check our directory to see if we're running inside MMD - - if ( -f "$me\\MultiMarkdown\\Support.pm") { - $MMDPath = "$me\\.."; - } - - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { - if ( -d "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; - } elsif ( -d "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; - } - } + my @candidates = (); - # Load the MultiMarkdown::Support.pm module - do "$MMDPath\\bin\\MultiMarkdown\\Support.pm" if ($MMDPath ne ""); + if ( -f File::Spec->join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - =head1 NAME mmd2letter - utility script for MultiMarkdown to convert MultiMarkdown text @@ -226,4 +190,4 @@ =head1 COPYRIGHT AND LICENSE 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA -=cut \ No newline at end of file +=cut diff --git a/bin/mmd2web.pl b/bin/mmd2web.pl index ec540c2..534570f 100755 --- a/bin/mmd2web.pl +++ b/bin/mmd2web.pl @@ -27,16 +27,15 @@ use warnings; use File::Basename; +use File::Spec; use Cwd; use Cwd 'abs_path'; - # Determine where MMD is installed. Use a "common installation" if available. -my $me = $0; # Where is this script located? +my $me = readlink(__FILE__) || __FILE__; # Where is this script located? my $MMDPath = LocateMMD($me); - # Determine whether we are in "file mode" or "stdin mode" my $count = @ARGV; @@ -89,83 +88,47 @@ sub LocateMMD { $me = dirname($me); - if ($os =~ /MSWin/) { - # We're running Windows - - # First check our directory to see if we're running inside MMD - - if ( -f "$me\\MultiMarkdown\\Support.pm") { - $MMDPath = "$me\\.."; - } - - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { - if ( -d "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; - } elsif ( -d "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown") { - $MMDPath = "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; - } - } + my @candidates = (); - # Load the MultiMarkdown::Support.pm module - do "$MMDPath\\bin\\MultiMarkdown\\Support.pm" if ($MMDPath ne ""); + if ( -f File::Spec->join($me, 'MultiMarkdown.pl') ) { + $MMDPath = dirname($me); } else { - # We're running Mac OS X or some *nix - - # First check our directory to see if we're running inside MMD - - if ( -f "$me/MultiMarkdown/Support.pm") { - $MMDPath = "$me/.."; - } - # Next, look in user's home directory, then in common directories - if ($MMDPath eq "") { + if ($os =~ /MSWin/) { + # We're running Windows + push @candidates, "$ENV{HOMEDRIVE}$ENV{HOMEPATH}\\MultiMarkdown"; + push @candidates, "$ENV{HOMEDRIVE}\\Documents and Settings\\All Users\\MultiMarkdown"; + + } else { + # We're running Mac OS X or some *nix if (defined($ENV{HOME})) { - if ( -d "$ENV{HOME}/Library/Application Support/MultiMarkdown") { - $MMDPath = "$ENV{HOME}/Library/Application Support/MultiMarkdown"; - } elsif ( -d "$ENV{HOME}/.multimarkdown") { - $MMDPath = "$ENV{HOME}/.multimarkdown"; - } - } - if ($MMDPath eq "") { - if ( -d "/Library/Application Support/MultiMarkdown") { - $MMDPath = "/Library/Application Support/MultiMarkdown"; - } elsif ( -d "/usr/share/multimarkdown") { - $MMDPath = "/usr/share/multimarkdown"; - } + push @candidates, "$ENV{HOME}/Library/Application Support/MultiMarkdown"; + push @candidates, "$ENV{HOME}/.multimarkdown"; + push @candidates, "/Library/Application Support/MultiMarkdown"; + push @candidates, "/usr/share/multimarkdown"; } } } + foreach (@candidates) { + if (-d $_) { + $MMDPath = $_; + last; + } + } + if ($MMDPath eq "") { die "You do not appear to have MultiMarkdown installed.\n"; } else { # Load the MultiMarkdown::Support.pm module $MMDPath = abs_path($MMDPath); - LoadModule("$MMDPath/bin/MultiMarkdown/Support.pm"); + unshift (@INC, File::Spec->join($MMDPath, 'lib')); + require MultiMarkdown::Support; } - # Clean up the path - $MMDPath = abs_path($MMDPath); - return $MMDPath; } -sub LoadModule { - my $file = shift; - my $os = $^O; # Mac = darwin; Linux = linux; Windows contains MSWin - - if ($os =~ /MSWin/) { - # Not sure what I can do here - } else { - unless (my $return = eval `cat "$file"`) { - warn "couldn't parse $file: $@" if $@; - warn "couldn't do $file: $!" unless defined $return; - warn "couldn't run $file" unless $return; - } - } -} - =head1 NAME mmd2XHTML - utility script for MultiMarkdown to convert MultiMarkdown text @@ -220,4 +183,4 @@ =head1 COPYRIGHT AND LICENSE 59 Temple Place, Suite 330 Boston, MA 02111-1307 USA -=cut \ No newline at end of file +=cut diff --git a/lib/MultiMarkdown.pm b/lib/MultiMarkdown.pm new file mode 100644 index 0000000..4a20df3 --- /dev/null +++ b/lib/MultiMarkdown.pm @@ -0,0 +1,2751 @@ +# MultiMarkdown -- A modification of John Gruber's original Markdown +# that adds new features and an output format that can more readily +# be converted into other document formats +# +# $Id: MultiMarkdown.pl 525 2009-06-15 18:45:44Z fletcher $ +# +# Original Code Copyright (c) 2004-2007 John Gruber +# +# +# MultiMarkdown changes Copyright (c) 2005-2009 Fletcher T. Penney +# +# +# MultiMarkdown Version 2.0.b6 +# +# Based on Markdown.pl 1.0.2b8 - Wed 09 May 2007 +# +# +# TODO: Still need to get the glossary working in non-memoir documents +# TODO: A mechanism to include arbitrary code (LaTeX, etc) without being "ugly" +# TODO: Look into discussion re: assigning classes to div's/span's on Markdown list. +# TODO: Improve support for tables with long items and overall width in LaTeX +# TODO: Need a syntax for long table cells in MMD, even if no rowspan feature yet +# TODO: Create utilities to convert MMD tables to/from tab-delimited + + +package MultiMarkdown; +require 5.006_000; +use strict; +use warnings; + +use File::Basename; +use File::Spec; +use Digest::MD5 qw(md5_hex); +use Carp qw(croak); +use base 'Exporter'; + +our $VERSION = '2.0.b6'; +our @EXPORT_OK = qw{markdown}; + +require Text::ASCIIMathML; + +our $mathParser = new Text::ASCIIMathML(); + +## Disabled; causes problems under Perl 5.6.1: +# use utf8; +# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html + +# +# Globals: +# + +# Reusable patterns to match balanced [brackets] and (parens). See +# Friedl's "Mastering Regular Expressions", 2nd Ed., pp. 328-331. +our ($g_nested_brackets, $g_nested_parens); +$g_nested_brackets = qr{ + (?> # Atomic matching + [^\[\]]+ # Anything other than brackets + | + \[ + (??{ $g_nested_brackets }) # Recursive set of nested brackets + \] + )* +}x; + +# Doesn't allow for whitespace, because we're using it to match URLs: +$g_nested_parens = qr{ + (?> # Atomic matching + [^()\s]+ # Anything other than parens or whitespace + | + \( + (??{ $g_nested_parens }) # Recursive set of nested brackets + \) + )* +}x; + + +# Table of hash values for escaped characters: +our %g_escape_table; +foreach my $char (split //, '\\`*_{}[]()>#+-.!') { + $g_escape_table{$char} = md5_hex($char); +} + +# +# Global default settings: +# + +our %g_default_settings = ( + allow_mathml => 1, + base_header_level => 1, + base_url => "", + bibliography_title => "Bibliography", + document_format => "", + empty_element_suffix => " />", + heading_ids => 1, + img_ids => 1, + tab_width => 4, + use_metadata => 1, + # WikiWords and [[Wiki Links]] are not supported anymore + use_wikilinks => 0, + codeblocks_newline => '', + running_blockquotes => 1, + running_lists => undef, +); + +=head1 NAME + +MultiMarkdown - Convert MultiMarkdown syntax to (X)HTML + +=head1 SYNOPSIS + + use MultiMarkdown 'markdown'; + my $html = markdown($text); + + use MultiMarkdown 'markdown'; + my $html = markdown( $text, { + empty_element_suffix => '>', + tab_width => 2, + use_wikilinks => 1, + } ); + + use MultiMarkdown; + my $m = MultiMarkdown->new; + my $html = $m->markdown($text); + + use MultiMarkdown; + my $m = MultiMarkdown->new( + empty_element_suffix => '>', + tab_width => 2, + use_wikilinks => 1, + ); + my $html = $m->markdown( $text ); + +=head1 DESCRIPTION + +Markdown is a text-to-HTML filter; it translates an easy-to-read / +easy-to-write structured text format into HTML. Markdown's text format +is most similar to that of plain text email, and supports features such +as headers, *emphasis*, code blocks, blockquotes, and links. + +Markdown's syntax is designed not as a generic markup language, but +specifically to serve as a front-end to (X)HTML. You can use span-level +HTML tags anywhere in a Markdown document, and you can use block level +HTML tags (C<<
    >>, C<< >> etc.). Note that by default +Markdown isn't interpreted in HTML block-level elements, unless you add +a C attribute to the element. See L for +details. + +This module implements the MultiMarkdown markdown syntax extensions from: + + http://fletcherpenney.net/multimarkdown/ + +=head1 SYNTAX + +For more information about (original) Markdown's syntax, see: + + http://daringfireball.net/projects/markdown/ + +This module implements MultiMarkdown, which is an extension to Markdown.. + +The extension is documented at: + + http://fletcherpenney.net/multimarkdown/ + +and borrows from php-markdown, which lives at: + + http://michelf.com/projects/php-markdown/extra/ + +This documentation is going to be moved/copied into this module for clearer reading in a future release.. + +=head1 OPTIONS + +MultiMarkdown supports a number of options to it's processor which control the behaviour of the output document. + +These options can be supplied to the constructor, on in a hash with the individual calls to the markdown method. +See the synopsis for examples of both of the above styles. + +The options for the processor are: + +=over + +=item use_metadata + +Controls the metadata options below. + +=item strip_metadata + +If true, any metadata in the input document is removed from the output document (note - does not take effect in complete document format). + +=item empty element suffix + +This option can be used to generate normal HTML output. By default, it is ' />', which is xHTML, change to '>' for normal HTML. + +=item img_ids + +Controls if tags generated have an id attribute. Defaults to true. +Turn off for compatibility with the original markdown. + +=item heading_ids + +Controls if tags generated have an id attribute. Defaults to true. +Turn off for compatibility with the original markdown. + +=item bibliography_title + +The title of the generated bibliography, defaults to 'Bibliography'. + +=item tab_width + +Controls indent width in the generated markup, defaults to 4 + +=item disable_tables + +If true, this disables the MultiMarkdown table handling. + +=item disable_footnotes + +If true, this disables the MultiMarkdown footnotes handling. + +=item disable_bibliography + +If true, this disables the MultiMarkdown bibliography/citation handling. + +=back + +A number of possible items of metadata can also be supplied as options. +Note that if the use_metadata is true then the metadata in the document will overwrite the settings on command line. + +Metadata options supported are: + +=over + +=item document_format + +=item use_wikilinks + +=item base_url + +=back + +=head1 METADATA + +MultiMarkdown supports the concept of 'metadata', which allows you to specify a number of formatting options +within the document itself. Metadata should be placed in the top few lines of a file, on value per line as colon separated key/value pairs. +The metadata should be separated from the document with a blank line. + +Most metadata keys are also supported as options to the constructor, or options +to the markdown method itself. (Note, as metadata, keys contain space, whereas options the keys are underscore separated.) + +You can attach arbitrary metadata to a document, which is output in HTML tags if unknown, see t/11document_format.t for more info. + +A list of 'known' metadata keys, and their effects are listed below: + +=over + +=item document format + +If set to 'complete', MultiMarkdown will render an entire xHTML page, otherwise it will render a document fragment + +=over + +=item css + +Sets a CSS file for the file, if in 'complete' document format. + +=item title + +Sets the page title, if in 'complete' document format. + +=back + +=item use wikilinks + +If set to '1' or 'on', causes links that are WikiWords to automatically be processed into links. + +=item base url + +This is the base URL for referencing wiki pages. In this is not supplied, all wiki links are relative. + +=back + +=head1 METHODS + +=head2 new + +A simple constructor, see the SYNTAX and OPTIONS sections for more information. + +=cut + +sub new { + my ($class, %params) = @_; + + my %p = %g_default_settings; + foreach (keys %params) { + $p{$_} = $params{$_}; + } + + if ($p{use_wikilinks}) { + croak('Sorry, WikiLinks are not supported in this version of ' . __PACKAGE__); + } + + # The original Markdown implementation supports "running blockquotes": if + # any line in a paragraph start with the '>' character, that line and all + # the subsequent ones are split from the paragraph and become a + # blockquote. This is inconsistent with the list behavior (list don't + # start mid-paragraph). + # + # Additionally, if a blockquote happens within a non-block list item (e.g. + # a standalone item or an item in a sequence of items not separated by + # empty lines), mismatched markup is generated, with interleaved + # 'blockquote' and 'li' tag pairs because Markdown starts thinking it's in + # span mode, and then reparses the span-mode output in block mode. + # + # Blockquote-in-list detection is solved by letting the list item + # processor check for existence of >-starting lines in the whole item. + # Since this is inefficient, we allow the user to disable running + # blockquotes, in which case blockquotes cannot start mid-paragraph + # (consistently with the list behavior) and the blockquote-in-list + # detection is much more efficient. + + if ($p{running_blockquotes}) { + $p{_blockquote_lead} = ''; + $p{_list_blockquote_pattern} = qr/^[ \t]*>/m; + } else { + $p{_blockquote_lead} = qr/(?:(?<=\n\n)|\A\n?)/; + $p{_list_blockquote_pattern} = qr/\A>/; + } + + # As an extension to Markdown, we also support running lists, i.e. + # lists that can start in the middle of a pragraph. running_lists is + # undef by default, and when defined it should be set to the regexp + # that must match at the end of the preceding line to allow a running + # list (typically something like ':' would be used); + + if (defined $p{running_lists}) { + $p{_list_lead} = qr/(?:(?<=\n\n|$p{running_lists}\n)|\A\n?)/; + } else { + $p{_list_lead} = qr/(?:(?<=\n\n)|\A\n?)/; + } + + my $self = { params => \%p }; + bless $self, ref($class) || $class; + return $self; +} + +=head2 markdown + +The main function as far as the outside world is concerned. See the SYNOPSIS +for details on use. + +=cut + +sub markdown { + my ( $self, $text, $options ) = @_; + + # Detect functional mode, and create an instance for this run.. + unless (ref $self) { + if ( $self ne __PACKAGE__ ) { + my $ob = __PACKAGE__->new(); + # $self is text, $text is options + return $ob->markdown($self, $text); + } + else { + croak('Calling ' . $self . '->markdown (as a class method) is not supported.'); + } + } + + $options ||= {}; + + %$self = (%{ $self->{params} }, %$options, params => $self->{params}); + + $self->_CleanUpRunData($options); + + return $self->_Markdown($text); +} + +sub _CleanUpRunData { + my ($self, $options) = @_; + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + $self->{_urls} = {}; + $self->{_titles} = {}; + $self->{_html_blocks} = {}; + + $self->{_metadata} = {}; + $self->{_metadata_newline} = { + 'default' => "\n", + 'keywords' => ", ", + }; + $self->{_crossrefs} = {}; + $self->{_footnotes} = {}; + $self->{_attributes} = {}; + $self->{_used_footnotes} = {}; + $self->{_footnote_counter} = 0; + $self->{_used_references} = []; + $self->{_citation_counter} = 0; + $self->{_references} = {}; + +# Used to track when we're inside an ordered or unordered list +# (see _ProcessListItems() for details): + $self->{_list_level} = 0; +} + + +sub _Markdown { +# +# Main function. The order in which other subs are called here is +# essential. Link and image substitutions need to happen before +# _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the +# and tags get encoded. +# + my ($self, $text) = @_; + + $text = $self->_CleanUpDoc($text); + + # Strip out MetaData + $text = $self->_ParseMetaData($text) if ($self->{use_metadata} || $self->{strip_metadata}); + + # And recheck for leading blank lines + $text =~ s/^\n+//s; + + # Turn block-level HTML blocks into hash entries + $text = $self->_HashHTMLBlocks($text); + + # Strip footnote and link definitions, store in hashes. + $text = $self->_StripFootnoteDefinitions($text) unless $self->{disable_footnotes}; + + $text = $self->_StripLinkDefinitions($text); + + $self->_GenerateImageCrossRefs($text); + + $text = $self->_StripMarkdownReferences($text); + + $text = $self->_RunBlockGamut($text); + + $text = $self->_DoMarkdownCitations($text) unless $self->{disable_bibliography}; + $text = $self->_DoFootnotes($text) unless $self->{disable_footnotes}; + + $text = _UnescapeSpecialChars($text); + + # Clean encoding within HTML comments + $text = $self->_UnescapeComments($text); + + $text = $self->_FixFootnoteParagraphs($text) unless $self->{disable_footnotes}; + $text .= $self->_PrintFootnotes() unless $self->{disable_footnotes}; + $text .= $self->_PrintMarkdownBibliography() unless $self->{disable_bibliography}; + + $text = _ConvertCopyright($text); + + if (lc($self->{document_format}) =~ /^complete\s*$/i) { + return $self->_xhtmlMetaData() . "\n\n" . $text . "\n\n"; + } elsif (lc($self->{document_format}) =~ /^snippet\s*$/i) { + return $text . "\n"; + } else { + return $self->{document_format} . $self->_textMetaData() . $text . "\n"; + } + +} + +sub _CleanUpDoc { + my ($self, $text) = @_; + + # Standardize line endings: + $text =~ s{\r\n}{\n}g; # DOS to Unix + $text =~ s{\r}{\n}g; # Mac to Unix + + # Make sure $text ends with a couple of newlines: + $text .= "\n\n"; + + # Convert all tabs to spaces. + $text = $self->_Detab($text); + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + $text =~ s/^[ \t]+$//mg; + + return $text; +} + + +sub _StripLinkDefinitions { +# +# Strips link definitions from text, stores the URLs and titles in +# hash references. +# + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} - 1; + + # Link defs are in the form: ^[id]: url "optional title" + while ($text =~ s{ + # Pattern altered for MultiMarkdown + # in order to not match citations or footnotes + ^[ ]{0,$less_than_tab}\[([^#^].*)\]: # id = $1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + ? # url = $2 + [ \t]* + \n? # maybe one newline + [ \t]* + (?: + (?<=\s) # lookbehind for whitespace + ["(] + (.+?) # title = $3 + [")] + [ \t]* + )? # title is optional + + # MultiMarkdown addition for attribute support + \n? + ( # Attributes = $4 + (?<=\s) # lookbehind for whitespace + (([ \t]*\n)?[ \t]*((\S+=\S+)|(\S+=".*?")))* + )? + [ \t]* + # /addition + (?:\n+|\Z) + } + {}mx) { +# $self->{_urls}{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive + $self->{_urls}{lc $1} = $2; # Link IDs are case-insensitive + if ($3) { + $self->{_titles}{lc $1} = $3; + $self->{_titles}{lc $1} =~ s/"/"/g; + } + + # MultiMarkdown addition " + if ($4) { + $self->{_attributes}{lc $1} = $4; + } + # /addition + } + + return $text; +} + +sub _StripHTML { + # Strip (X)HTML code from string + my $text = shift; + + $text =~ s/<.*?>//g; + + return $text; +} + +# Hashify HTML blocks: +# We only want to do this for block-level HTML tags, such as headers, +# lists, and tables. That's because we still want to wrap

    s around +# "paragraphs" that are wrapped in non-block-level tags, such as anchors, +# phrase emphasis, and spans. The list of tags we're looking for is +# hard-coded: +our $g_block_tags = qr{ + (?: + p | div | h[1-6] | blockquote | pre | table | + dl | ol | ul | script | noscript | form | + fieldset | iframe | ins | del | + # HTML5 + address | canvas | picture | figcaption | svg | math | + article | header | footer | section | aside | video | + output | hgroup | canvas | figure | main | nav | + details | summary | + # my own + switch + ) + }x; # MultiMarkdown does not include `math` in the above list so that + # Equations can optionally be included in separate paragraphs + +our $g_tag_attrs = qr{ + (?: # Match one attr name/value pair + \s+ # There needs to be at least some whitespace + # before each attribute name. + [\w.:_-]+ # Attribute name + \s*=\s* + (?: + ".+?" # "Attribute value" + | + '.+?' # 'Attribute value' + ) + )* # Zero or more + }x; + +our $g_empty_tag = qr{< \w+ $g_tag_attrs \s* />}xms; +our $g_open_tag = qr{< $g_block_tags $g_tag_attrs \s* >}xms; +our $g_close_tag = undef; # let Text::Balanced handle this + +use Text::Balanced qw(gen_extract_tagged); +our $g_extract_block = gen_extract_tagged($g_open_tag, $g_close_tag, undef, { ignore => [$g_empty_tag] }); + +sub _HashHTMLBlocks { + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} - 1; + + if ($text =~ /<$g_block_tags/) { + my @chunks; + ## TO-DO: the 0,3 on the next line ought to respect the + ## tabwidth, or else, we should mandate 4-space tabwidth and + ## be done with it: + while ($text =~ s{^(([ ]{0,3}<)?.*\n)}{}m) { + my $cur_line = $1; + if (defined $2) { + # current line could be start of code block + + my ($tag, $remainder) = $g_extract_block->($cur_line . $text); + if ($tag) { + my $key = md5_hex($tag); + $self->{_html_blocks}{$key} = $tag; + push @chunks, "\n\n" . $key . "\n\n"; + $text = $remainder; + } + else { + # No tag match, so toss $cur_line into @chunks + push @chunks, $cur_line; + } + } + else { + # current line could NOT be start of code block + push @chunks, $cur_line; + } + + } + push @chunks, $text; # Whatever is left. + + $text = join '', @chunks; + } + + + + # Special case just for


    . It was easier to make a special case than + # to make the other regex more complicated. + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + <(hr) # start tag = $2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $self->{_html_blocks}{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + # Special case for standalone HTML comments: + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + (?s: + + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $self->{_html_blocks}{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + # PHP and ASP-style processor instructions ( and <%…%>) + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + (?s: + <([?%]) # $2 + .*? + \2> + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $self->{_html_blocks}{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + + return $text; +} + + +sub _RunBlockGamut { +# +# These are all the transformations that form block-level +# tags like paragraphs, headers, and list items. +# + my ($self, $text) = @_; + + $text = $self->_DoHeaders($text); + + # Do tables first to populate the table id's for cross-refs + # Escape
     so we don't get greedy with tables
    +
    +	$text = $self->_DoTables($text) unless $self->{disable_tables};
    +
    +	# And now, protect our tables
    +	$text = $self->_HashHTMLBlocks($text);
    +
    +	# Do Horizontal Rules:
    +	$text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n{empty_element_suffix}\n}gmx;
    +	$text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n{empty_element_suffix}\n}gmx;
    +	$text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n{empty_element_suffix}\n}gmx;
    +
    +	$text = $self->_DoDefinitionLists($text);
    +	$text = $self->_DoLists($text);
    +	$text = $self->_DoCodeBlocks($text);
    +	$text = $self->_DoBlockQuotes($text);
    +
    +	# We already ran _HashHTMLBlocks() before, in Markdown(), but that
    +	# was to escape raw HTML in the original Markdown source. This time,
    +	# we're escaping the markup we've just created, so that we don't wrap
    +	# 
  • " . $item . "
  • \n"; + }egmx; + + $self->{_list_level}--; + return $list_str; +} + + + +sub _DoCodeBlocks { +# +# Process Markdown `
    ` blocks.
    +#
    +
    +	my ($self, $text) = @_;
    +
    +	$text =~ s{
    +			(?:\n\n|\A)
    +			(	            # $1 = the code block -- one or more lines, starting with a space/tab
    +			  (?:
    +			    (?:[ ]{$self->{tab_width}} | \t)  # Lines must start with a tab or a tab-width of spaces
    +			    .*\n+
    +			  )+
    +			)
    +			((?=^[ ]{0,$self->{tab_width}}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
    +		}{
    +			my $codeblock = $1;
    +			my $result; # return value
    +
    +			$codeblock = _EncodeCode($self->_Outdent($codeblock));
    +			$codeblock = $self->_Detab($codeblock);
    +			$codeblock =~ s/\A\n+//; # trim leading newlines
    +			$codeblock =~ s/\n+\z//; # trim trailing newlines
    +
    +			$result = "\n\n
    " . $codeblock . "$self->{codeblocks_newline}
    \n\n"; + + $result; + }egmx; + + return $text; +} + + +sub _DoCodeSpans { +# +# * Backtick quotes are used for spans. +# +# * You can use multiple backticks as the delimiters if you want to +# include literal backticks in the code span. So, this input: +# +# Just type ``foo `bar` baz`` at the prompt. +# +# Will translate to: +# +#

    Just type foo `bar` baz at the prompt.

    +# +# There's no arbitrary limit to the number of backticks you +# can use as delimters. If you need three consecutive backticks +# in your code, use four for delimiters, etc. +# +# * You can use spaces to get literal backticks at the edges: +# +# ... type `` `bar` `` ... +# +# Turns to: +# +# ... type `bar` ... +# + + my $text = shift; + + $text =~ s@ + (?$c
    "; + @egsx; + + return $text; +} + + +sub _EncodeCode { +# +# Encode/escape certain characters inside Markdown code runs. +# The point is that in code, these characters are literals, +# and lose their special Markdown meanings. +# + local $_ = shift; + + # Encode all ampersands; HTML entities are not + # entities within a Markdown code span. + s/&/&/g; + + # Encode $'s, but only if we're running under Blosxom. + # (Blosxom interpolates Perl variables in article bodies.) + { + no warnings 'once'; + if (defined($blosxom::version)) { + s/\$/$/g; + } + } + + + # Do the angle bracket song and dance: + s! < !<!gx; + s! > !>!gx; + + # Now, escape characters that are magic in Markdown: + s! \* !$g_escape_table{'*'}!gx; + s! _ !$g_escape_table{'_'}!gx; + s! { !$g_escape_table{'{'}!gx; + s! } !$g_escape_table{'}'}!gx; + s! \[ !$g_escape_table{'['}!gx; + s! \] !$g_escape_table{']'}!gx; + s! \\ !$g_escape_table{'\\'}!gx; + + return $_; +} + + +sub _DoItalicsAndBold { + my $text = shift; + + # Cave in - `*` and `_` behave differently... We'll see how it works out + + + # must go first: + $text =~ s{ (?$2}gsx; + + $text =~ s{ (?$2}gsx; + + # And now, a second pass to catch nested strong and emphasis special cases + $text =~ s{ (?$2}gsx; + + $text =~ s{ (?$2}gsx; + + # And now, allow `*` in the middle of words + + # must go first: + $text =~ s{ (\*\*) (?=\S) (.+?[*]*) (?<=\S) \1 } + {$2}gsx; + + $text =~ s{ (\*) (?=\S) (.+?) (?<=\S) \1 } + {$2}gsx; + + return $text; +} + + +sub _DoBlockQuotes { + my ($self, $text) = @_; + + $text =~ s{ + $self->{_blockquote_lead} + ( # Wrap whole match in $1 + ( + ^[ \t]*>[ \t]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + ) + }{ + my $bq = $1; + $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting + $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines + $bq = $self->_RunBlockGamut($bq); # recurse + + $bq =~ s/^/ /mg; + # These leading spaces screw with
     content, so we need to fix that:
    +			$bq =~ s{
    +					(\s*
    .+?
    ) + }{ + my $pre = $1; + $pre =~ s/^ //mg; + $pre; + }egsx; + + "
    \n$bq\n
    \n\n"; + }egmx; + + + return $text; +} + + +sub _FormParagraphs { +# +# Params: +# $text - string to process with html

    tags +# + my ($self, $text) = @_; + + # Strip leading and trailing lines: + $text =~ s/\A\n+//; + $text =~ s/\n+\z//; + + my @grafs = split(/\n{2,}/, $text); + + # + # Wrap

    tags. + # + foreach (@grafs) { + unless (defined( $self->{_html_blocks}{$_} )) { + $_ = $self->_RunSpanGamut($_); + s/^([ \t]*)/

    /; + $_ .= "

    "; + } + } + + # + # Unhashify HTML blocks + # +# foreach my $graf (@grafs) { +# my $block = $self->{_html_blocks}{$graf}; +# if (defined $block) { +# $graf = $block; +# } +# } + + foreach my $graf (@grafs) { + # Modify elements of @grafs in-place... + my $block = $self->{_html_blocks}{$graf}; + if (defined $block) { + $graf = $block; + if ($block =~ m{ + \A + ( # $1 =
    tag +
    ]* + \b + markdown\s*=\s* (['"]) # $2 = attr quote char + 1 + \2 + [^>]* + > + ) + ( # $3 = contents + .* + ) + (
    ) # $4 = closing tag + \z + + }xms + ) { + my ($div_open, $div_content, $div_close) = ($1, $3, $4); + + # We can't call Markdown(), because that resets the hash; + # that initialization code should be pulled into its own sub, though. + $div_content = $self->_HashHTMLBlocks($div_content); + $div_content = $self->_StripLinkDefinitions($div_content); + $div_content = $self->_RunBlockGamut($div_content); + $div_content = _UnescapeSpecialChars($div_content); + + $div_open =~ s{\smarkdown\s*=\s*(['"]).+?\1}{}ms; + + $graf = $div_open . "\n" . $div_content . "\n" . $div_close; + } + } + } + + + return join "\n\n", @grafs; +} + + +sub _EncodeAmpsAndAngles { +# Smart processing for ampersands and angle brackets that need to be encoded. + + my $text = shift; + + # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + # http://bumppo.net/projects/amputator/ + $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g; + + # Encode naked <'s + $text =~ s{<(?![a-z/?\$!])}{<}gi; + + return $text; +} + + +sub _EncodeBackslashEscapes { +# +# Parameter: String. +# Returns: The string, with after processing the following backslash +# escape sequences. +# + local $_ = shift; + + s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first. + s! \\` !$g_escape_table{'`'}!gx; + s! \\\* !$g_escape_table{'*'}!gx; + s! \\_ !$g_escape_table{'_'}!gx; + s! \\\{ !$g_escape_table{'{'}!gx; + s! \\\} !$g_escape_table{'}'}!gx; + s! \\\[ !$g_escape_table{'['}!gx; + s! \\\] !$g_escape_table{']'}!gx; + s! \\\( !$g_escape_table{'('}!gx; + s! \\\) !$g_escape_table{')'}!gx; + s! \\> !$g_escape_table{'>'}!gx; + s! \\\# !$g_escape_table{'#'}!gx; + s! \\\+ !$g_escape_table{'+'}!gx; + s! \\\- !$g_escape_table{'-'}!gx; + s! \\\. !$g_escape_table{'.'}!gx; + s{ \\! }{$g_escape_table{'!'}}gx; + + return $_; +} + + +sub _DoAutoLinks { + my ($self, $text) = @_; + + $text =~ s{<((https?|ftp|dict):[^'">\s]+)>}{$1}gi; + + # Email addresses: + $text =~ s{ + < + (?:mailto:)? + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + > + }{ + _EncodeEmailAddress( _UnescapeSpecialChars($1) ); + }egix; + + return $text; +} + + +sub _EncodeEmailAddress { +# +# Input: an email address, e.g. "foo@example.com" +# +# Output: the email address as a mailto link, with each character +# of the address encoded as either a decimal or hex entity, in +# the hopes of foiling most address harvesting spam bots. E.g.: +# +# foo +# @example.com +# +# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk +# mailing list: +# + + my $addr = shift; + + srand; + my @encode = ( + sub { '&#' . ord(shift) . ';' }, + sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, + sub { shift }, + ); + + $addr = "mailto:" . $addr; + + $addr =~ s{(.)}{ + my $char = $1; + if ( $char eq '@' ) { + # this *must* be encoded. I insist. + $char = $encode[int rand 1]->($char); + } elsif ( $char ne ':' ) { + # leave ':' alone (to spot mailto: later) + my $r = rand; + # roughly 10% raw, 45% hex, 45% dec + $char = ( + $r > .9 ? $encode[2]->($char) : + $r < .45 ? $encode[1]->($char) : + $encode[0]->($char) + ); + } + $char; + }gex; + + $addr = qq{$addr}; + $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part + + return $addr; +} + + +sub _UnescapeSpecialChars { +# +# Swap back in all the special characters we've hidden. +# + my $text = shift; + + while( my($char, $hash) = each(%g_escape_table) ) { + $text =~ s/$hash/$char/g; + } + return $text; +} + + +sub _TokenizeHTML { +# +# Parameter: String containing HTML markup. +# Returns: Reference to an array of the tokens comprising the input +# string. Each token is either a tag (possibly with nested, +# tags contained therein, such as , or a +# run of text between tags. Each element of the array is a +# two-element array; the first is either 'tag' or 'text'; +# the second is the actual value. +# +# +# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. +# +# + + my $str = shift; + my $pos = 0; + my $len = length $str; + my @tokens; + + my $depth = 6; + my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); + my $match = qr/(?s: ) | # comment + (?s: <\? .*? \?> ) | # processing instruction + $nested_tags/ix; # nested tags + + while ($str =~ m/($match)/g) { + my $whole_tag = $1; + my $sec_start = pos $str; + my $tag_start = $sec_start - length $whole_tag; + if ($pos < $tag_start) { + push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; + } + push @tokens, ['tag', $whole_tag]; + $pos = pos $str; + } + push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; + + return \@tokens; +} + + +sub _Outdent { +# +# Remove one level of line-leading tabs or spaces +# + my ($self, $text) = @_; + + $text =~ s/^(\t|[ ]{1,$self->{tab_width}})//gm; + return $text; +} + + +sub _Detab { +# +# Cribbed from a post by Bart Lateur: +# +# + my ($self, $text) = @_; + + $text =~ s{(.*?)\t}{$1.(' ' x ($self->{tab_width} - length($1) % $self->{tab_width}))}ge; + return $text; +} + +# +# MultiMarkdown Routines +# + +sub _ParseMetaData { + my ($self, $text) = @_; + my $clean_text = ""; + + my ($inMetaData, $currentKey, $lckey) = (1,'', ''); + + # If only metadata is "Format: complete" then skip + + if ($text =~ s/^(Format):\s*complete\n(.*?)\n/$2\n/is) { + # If "Format: complete" was added automatically, don't force first + # line of text to be metadata + $self->{_metadata}{$1}= "complete"; + $self->{document_format} = "complete"; + } + + foreach my $line ( split /\n/, $text ) { + $line =~ /^$/ and $inMetaData = 0; + if ($inMetaData) { + if ($line =~ /^([a-zA-Z0-9][0-9a-zA-Z _-]*?):\s*(.*)$/ ) { + $currentKey = $1; + my $meta = $2; + $currentKey =~ s/\s+/ /g; + $currentKey =~ s/\s$//; + $lckey = lc $currentKey; + $self->{_metadata}{$currentKey} = $meta; + if ($lckey eq "format") { + $self->{document_format} = lc($self->{_metadata}{$currentKey}); + } + if ($lckey eq "base url") { + $self->{base_url} = $self->{_metadata}{$currentKey}; + } + if ($lckey eq "bibliography title") { + $self->{bibliography_title} = $self->{_metadata}{$currentKey}; + $self->{bibliography_title} =~ s/\s*$//; + } + if ($lckey eq "base header level") { + $self->{base_header_level} = $self->{_metadata}{$currentKey}; + } + if (!$self->{_metadata_newline}{$lckey}) { + $self->{_metadata_newline}{$lckey} = $self->{_metadata_newline}{default}; + } + } else { + if ($currentKey eq "") { + # No metadata present + $clean_text .= "$line\n"; + $inMetaData = 0; + next; + } + if ($line =~ /^\s*(.+)$/ ) { + $self->{_metadata}{$currentKey} .= "$self->{_metadata_newline}{$lckey}$1"; + } + } + } else { + $clean_text .= "$line\n"; + } + } + + return $clean_text; +} + +sub _StripFootnoteDefinitions { + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} - 1; + + while ($text =~ s{ + \n[ ]{0,$less_than_tab}\[\^([^\n]+?)\]\:[ \t]*# id = $1 + \n? + (.*?)\n{1,2} # end at new paragraph + ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + } + {\n}sx) + { + my $id = $1; + my $footnote = "$2\n"; + $footnote =~ s/^[ ]{0,$self->{tab_width}}//gm; + + $self->{_footnotes}{_Id2Footnote($id)} = $footnote; + } + + return $text; +} + +sub _DoFootnoteMarks { + my ($self, $text) = @_; + + $text =~ s{ + \[\^(.+?)\] # id = $1 + }{ + my $result = ""; + my $id = _Id2Footnote($1); + if (defined $self->{_footnotes}{$id} ) { + my $fn = $self->{_used_footnotes}; + my ($counter, $uses); + if (defined $fn->{$id}) { + $counter = $fn->{$id}->{counter}; + $uses = $fn->{$id}->{uses}+1; + } else { + $counter = ++$self->{_footnote_counter}; + $uses = 1; + } + if ($self->{_footnotes}{$id} =~ /^(

    )?glossary:/i) { + $result = "$counter"; + } else { + $result = "$counter"; + } + $fn->{$id} = { counter => $counter, uses => $uses }; + } + $result; + }xsge; + + return $text; +} + +sub _DoFootnotes { + my ($self, $text) = @_; + + # First, run routines that get skipped in footnotes + foreach my $label (sort keys %{$self->{_footnotes}}) { + my $footnote = $self->_RunBlockGamut($self->{_footnotes}{$label}); + + $footnote = $self->_DoMarkdownCitations($footnote); + $footnote = $self->_DoFootnoteMarks($footnote); + $self->{_footnotes}{$label} = $footnote; + } + + return $self->_DoFootnoteMarks($text); +} + +sub _FixFootnoteParagraphs { + my ($self, $text) = @_; + + $text =~ s/^\\<\/footnote\>/<\/footnote>/gm; + + return $text; +} + +sub _PrintFootnotes { + my $self = shift; + my $footnote_counter = 0; + my @fnlist = (); + my $result = ""; + + while (my ($id, $hash) = each(%{$self->{_used_footnotes}})) { + $footnote_counter = $hash->{counter}; + my $uses = $hash->{uses}; + my $use = 0; + my $footnote = $self->{_footnotes}{$id}; + my $footnote_closing_tag = ""; + my $fntext = ""; + + $footnote =~ s/(\<\/(p(re)?|ol|ul)\>)$//; + $footnote_closing_tag = $1; + + if ($footnote =~ s/^(

    )?glossary:\s*//i) { + # Add some formatting for glossary entries + + $footnote =~ s{ + ^(.*?) # $1 = term + \s* + (?:\(([^\(\)]*)\)[^\n]*)? # $2 = optional sort key + \n + }{ + my $glossary = "$1"; + + if ($2) { + $glossary.="$2"; + }; + + $glossary . ":

    "; + }egsx; + + $fntext.="

  • $footnote"; + } else { + $fntext.="
  • $footnote"; + } + while ($use < $uses) { + $use++; + $fntext.=" ↩"; + $fntext.=" " if $use < $uses; # some whitespace between backreferences + } + $fntext.="$footnote_closing_tag
  • \n\n"; + $fnlist[$footnote_counter-1] = $fntext; + } + + if (@fnlist > 0) { + $result = "\n\n
    \n{empty_element_suffix}\n
      \n\n". + join('',@fnlist) . "
    \n
    "; + } else { + $result = ""; + } + + $result= _UnescapeSpecialChars($result); + return $result; +} + +sub _Header2Label { + my $header = shift; + my $label = lc $header; + $label =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters + while ($label =~ s/^[^A-Za-z]//g) + {}; # Strip illegal leading characters + return $label; +} + +sub _Id2Footnote { + # Since we prepend "fn:", we can allow leading digits in footnotes + my $id = shift; + my $footnote = lc $id; + $footnote =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters + return $footnote; +} + + +sub _xhtmlMetaData { + my $self = shift; + my $result = qq{\n}; + + # This screws up xsltproc - make sure to use `-nonet -novalid` if you + # have difficulty + if ($self->{allow_mathml}) { + $result .= qq{ +\n}; + + $result.= qq{\n\t\n}; + } else { + $result .= qq{\n}; + + $result.= qq!\n\t\n!; + } + + $result.= "\t\t\n"; + + foreach my $key (sort keys %{$self->{_metadata}}) { + # Strip trailing spaces + $self->{_metadata}{$key} =~ s/(\s)*$//s; + + # Strip spaces from key + my $export_key = $key; + $export_key =~ s/\s//g; + + if (lc($key) eq "title") { + $result.= "\t\t" . _EncodeAmpsAndAngles($self->{_metadata}{$key}) . "\n"; + } elsif (lc($key) eq "css") { + $result.= "\t\t{_metadata}{$key}\"$self->{empty_element_suffix}\n"; + } elsif (lc($export_key) eq "xhtmlheader") { + $result .= "\t\t$self->{_metadata}{$key}\n"; + } else { + my $encodedMeta = _EncodeAmpsAndAngles($self->{_metadata}{$key}); + $result.= qq!\t\t{empty_element_suffix}\n!; + } + } + $result.= "\t\n"; + + return $result; +} + +sub _textMetaData { + my $self = shift; + my $result = ""; + + return $result if $self->{strip_metadata}; + + foreach my $key (sort keys %{$self->{_metadata}}) { + $result .= "$key: $self->{_metadata}{$key}\n"; + } + $result =~ s/\s*\n/{empty_element_suffix}\n/g; + + if ($result ne "") { + $result.= "\n"; + } + + return $result; +} + +sub _ConvertCopyright{ + my $text = shift; + # Convert to an XML compatible form of copyright symbol + + $text =~ s/©/©/gi; + + return $text; +} + + +sub _DoTables { + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} - 1; + + # Algorithm inspired by PHP Markdown Extra's table support + # + + # Reusable regexp's to match table + + my $line_start = qr{ + [ ]{0,$less_than_tab} + }mx; + + my $table_row = qr{ + [^\n]*?\|[^\n]*?\n + }mx; + + my $first_row = qr{ + $line_start + \S+.*?\|.*?\n + }mx; + + my $table_rows = qr{ + (\n?$table_row) + }mx; + + my $table_caption = qr{ + $line_start + \[.*?\][ \t]*\n + }mx; + + my $table_divider = qr{ + $line_start + [\|\-\+\:\.][ \-\+\|\:\.]* \| [ \-\+\|\:\.]* + }mx; + + my $whole_table = qr{ + ($table_caption)? # Optional caption + ($first_row # First line must start at beginning + ($table_row)*?)? # Header Rows + $table_divider # Divider/Alignment definitions + $table_rows+ # Body Rows + ($table_caption)? # Optional caption + }mx; + + + # Find whole tables, then break them up and process them + + $text =~ s{ + ^($whole_table) # Whole table in $1 + (\n|\Z) # End of file or 2 blank lines + }{ + my $table = $1; + + # Clean extra spaces at end of lines - + # they cause the processing to choke + $table =~ s/[\t ]*\n/\n/gs; + + my $result = "

    tags around block-level tags. + $text = $self->_HashHTMLBlocks($text); + $text = $self->_FormParagraphs($text); + + return $text; +} + + +sub _RunSpanGamut { +# +# These are all the transformations that occur *within* block-level +# tags like paragraphs, headers, and list items. +# + my ($self, $text) = @_; + + $text = _DoCodeSpans($text); + $text = $self->_DoMathSpans($text); + $text = _EscapeSpecialCharsWithinTagAttributes($text); + $text = _EncodeBackslashEscapes($text); + + # Process anchor and image tags. Images must come first, + # because ![foo][f] looks like an anchor. + $text = $self->_DoImages($text); + $text = $self->_DoAnchors($text); + + # Make links out of things like `` + # Must come after _DoAnchors(), because you can use < and > + # delimiters in inline links like [this](). + $text = $self->_DoAutoLinks($text); + $text = _EncodeAmpsAndAngles($text); + $text = _DoItalicsAndBold($text); + + # Do hard breaks: + $text =~ s/ {2,}\n/ {empty_element_suffix}\n/g; + + return $text; +} + + +sub _EscapeSpecialCharsWithinTagAttributes { +# +# Within tags -- meaning between < and > -- encode [\ ` * _] so they +# don't conflict with their use in Markdown for code, italics and strong. +# We're replacing each such character with its corresponding MD5 checksum +# value; this is likely overkill, but it should prevent us from colliding +# with the escape values by accident. +# + my $text = shift; + my $tokens ||= _TokenizeHTML($text); + $text = ''; # rebuild $text from the tokens + + foreach my $cur_token (@$tokens) { + if ($cur_token->[0] eq "tag") { + $cur_token->[1] =~ s! \\ !$g_escape_table{'\\'}!gx; + $cur_token->[1] =~ s{ (?<=.)(?=.) }{$g_escape_table{'`'}}gx; + $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx; + $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx; + } + $text .= $cur_token->[1]; + } + return $text; +} + + +sub _DoAnchors { +# +# Turn Markdown link shortcuts into XHTML tags. +# + my ($self, $text) = @_; + + # + # First, handle reference-style links: [link text] [id] + # + $text =~ s{ + ( # wrap whole match in $1 + \[ + ($g_nested_brackets) # link text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + ) + }{ + my $result; + my $whole_match = $1; + my $link_text = $2; + my $link_id = lc $3; + + if ($link_id eq "") { + $link_id = lc $link_text; # for shortcut links like [this][]. + } + + # Allow automatic cross-references to headers + my $label = _Header2Label($link_id); + if (defined $self->{_urls}{$link_id}) { + my $url = $self->{_urls}{$link_id}; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "{_titles}{$link_id} ) { + my $title = $self->{_titles}{$link_id}; + $title =~ s! \* !$g_escape_table{'*'}!gx; + $title =~ s! _ !$g_escape_table{'_'}!gx; + $result .= " title=\"$title\""; + } + $result .= $self->_DoAttributes($label); + $result .= ">$link_text"; + } elsif (defined $self->{_crossrefs}{$label}) { + my $url = $self->{_crossrefs}{$label}; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "{_titles}{$label} ) { + my $title = $self->{_titles}{$label}; + $title =~ s! \* !$g_escape_table{'*'}!gx; + $title =~ s! _ !$g_escape_table{'_'}!gx; + $result .= " title=\"$title\""; + } + $result .= $self->_DoAttributes($label); + $result .= ">$link_text"; + } else { + $result = $whole_match; + } + $result; + }xsge; + + # + # Next, inline-style links: [link text](url "optional title") + # + $text =~ s{ + ( # wrap whole match in $1 + \[ + ($g_nested_brackets) # link text = $2 + \] + \( # literal paren + [ \t]* + ($g_nested_parens) # href = $3 + [ \t]* + ( # $4 + (['"]) # quote char = $5 + (.*?) # Title = $6 + \5 # matching quote + [ \t]* # ignore any spaces/tabs between closing quote and ) + )? # title is optional + \) + ) + }{ + my $result; + my $whole_match = $1; + my $link_text = $2; + my $url = $3; + my $title = $6; + + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $url =~ s{^<(.*)>$}{$1}; # Remove <>'s surrounding URL, if present + $result = "{_urls}{$link_id}) { + my $url = $self->{_urls}{$link_id}; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "{_titles}{$link_id} ) { + my $title = $self->{_titles}{$link_id}; + $title =~ s! \* !$g_escape_table{'*'}!gx; + $title =~ s! _ !$g_escape_table{'_'}!gx; + $result .= " title=\"$title\""; + } + $result .= $self->_DoAttributes($link_id); + $result .= ">$link_text"; + } elsif (defined $self->{_crossrefs}{$label}) { + my $url = $self->{_crossrefs}{$label}; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "{_titles}{$label} ) { + my $title = $self->{_titles}{$label}; + $title =~ s! \* !$g_escape_table{'*'}!gx; + $title =~ s! _ !$g_escape_table{'_'}!gx; + $result .= " title=\"$title\""; + } + $result .= $self->_DoAttributes($label); + $result .= ">$link_text"; + } else { + $result = $whole_match; + } + $result; + }xsge; + + return $text; +} + + +sub _DoImages { +# +# Turn Markdown image shortcuts into tags. +# + my ($self, $text) = @_; + + # + # First, handle reference-style labeled images: ![alt text][id] + # + $text =~ s{ + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + my $link_id = lc $3; + + if ($link_id eq "") { + $link_id = lc $alt_text; # for shortcut links like ![this][]. + } + + $alt_text =~ s/"/"/g; + + if (defined $self->{_urls}{$link_id}) { + my $url = $self->{_urls}{$link_id}; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + + my $idString = ""; + if ($self->{img_ids}) { + my $label = _Header2Label($alt_text); + $self->{_crossrefs}{$label} = "#$label"; + if (! defined $self->{_titles}{$link_id}) { + $self->{_titles}{$link_id} = $alt_text; + } + $idString = " id=\"$label\""; + } + + $result = "{_titles}{$link_id}) { + my $title = $self->{_titles}{$link_id}; + $title =~ s! \* !$g_escape_table{'*'}!gx; + $title =~ s! _ !$g_escape_table{'_'}!gx; + $result .= " title=\"$title\""; + } + $result .= $self->_DoAttributes($link_id); + $result .= $self->{empty_element_suffix}; + } + else { + # If there's no such link ID, leave intact: + $result = $whole_match; + } + + $result; + }xsge; + + # + # Next, handle inline images: ![alt text](url "optional title") + # Don't forget: encode * and _ + + $text =~ s{ + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + \s? # One optional whitespace character + \( # literal paren + [ \t]* + ($g_nested_parens) # href = $3 + [ \t]* + ( # $4 + (['"]) # quote char = $5 + (.*?) # title = $6 + \5 # matching quote + [ \t]* + )? # title is optional + \) + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + my $url = $3; + my $title = (defined $6) ? $6 : ''; + + $alt_text =~ s/"/"/g; + $title =~ s/"/"/g; + + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $url =~ s{^<(.*)>$}{$1}; # Remove <>'s surrounding URL, if present + + my $idString = ""; + if ($self->{img_ids}) { + my $label = _Header2Label($alt_text); + $self->{_crossrefs}{$label} = "#$label"; +# $self->{_titles}{$label} = $alt_text; # I think this line should not be here + $idString = " id=\"$label\""; + } + + $result = "{empty_element_suffix}; + + $result; + }xsge; + + return $text; +} + + +sub _DoHeaders { + my ($self, $text) = @_; + my $header = ""; + my $label = ""; + my $idString = ""; + + # Setext-style headers: + # Header 1 + # ======== + # + # Header 2 + # -------- + # + $text =~ s{ ^(.+?)(?:\s*(?_RunSpanGamut($1); + $header =~ s/^\s*//s; + + if ($self->{heading_ids} && $label ne "") { + $self->{_crossrefs}{$label} = "#$label"; + $self->{_titles}{$label} = _StripHTML($header); + $idString = " id=\"$label\""; + } else { + $idString = ""; + } + my $h_level = $self->{base_header_level}; + + "" . $header . "\n\n"; + }egmx; + + $text =~ s{ ^(.+?)(?:\s*(?_RunSpanGamut($1); + $header =~ s/^\s*//s; + + if ($self->{heading_ids} && $label ne "") { + $self->{_crossrefs}{$label} = "#$label"; + $self->{_titles}{$label} = _StripHTML($header); + $idString = " id=\"$label\""; + } else { + $idString = ""; + } + + my $h_level = $self->{base_header_level} +1; + + "" . $header . "\n\n"; + }egmx; + + + # atx-style headers: + # # Header 1 + # ## Header 2 + # ## Header 2 with closing hashes ## + # ... + # ###### Header 6 + # + $text =~ s{ + ^(\#{1,6}) # $1 = string of #'s + [ \t]* + (.+?) # $2 = Header text + [ \t]* + (?:(?{base_header_level} - 1; + if (defined $3) { + $label = _Header2Label($3); + } else { + $label = _Header2Label($2); + } + $header = $self->_RunSpanGamut($2); + $header =~ s/^\s*//s; + + if ($self->{heading_ids} && $label ne "") { + $self->{_crossrefs}{$label} = "#$label"; + $self->{_titles}{$label} = _StripHTML($header); + $idString = " id=\"$label\""; + } else { + $idString = ""; + } + + "" . $header . "\n\n"; + }egmx; + + return $text; +} + + +sub _DoLists { +# +# Form HTML ordered (numbered) and unordered (bulleted) lists. +# + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} - 1; + + # Re-usable patterns to match list item bullets and number markers: + my $marker_ul = qr/[*+-]/; + my $marker_ol = qr/\d+[.]/; + my $marker_any = qr/(?:$marker_ul|$marker_ol)/; + + # Re-usable pattern to match any entirel ul or ol list: + my $whole_list = qr{ + ( # $1 = whole list + ( # $2 + [ ]{0,$less_than_tab} + (${marker_any}) # $3 = first list item marker + [ \t]+ + ) + (?s:.+?) + ( # $4 + \z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + ${marker_any}[ \t]+ + ) + ) + ) + }mx; + + # We use a different prefix before nested lists than top-level lists. + # See extended comment in _ProcessListItems(). + # + # Note: There's a bit of duplication here. My original implementation + # created a scalar regex pattern as the conditional result of the test on + # $self->{_list_level}, and then only ran the $text =~ s{...}{...}egmx + # substitution once, using the scalar as the pattern. This worked, + # everywhere except when running under MT on my hosting account at Pair + # Networks. There, this caused all rebuilds to be killed by the reaper (or + # perhaps they crashed, but that seems incredibly unlikely given that the + # same script on the same server ran fine *except* under MT. I've spent + # more time trying to figure out why this is happening than I'd like to + # admit. My only guess, backed up by the fact that this workaround works, + # is that Perl optimizes the substition when it can figure out that the + # pattern will never change, and when this optimization isn't on, we run + # afoul of the reaper. Thus, the slightly redundant code that uses two + # static s/// patterns rather than one conditional pattern. + + if ($self->{_list_level}) { + $text =~ s{ + ^ + $whole_list + }{ + my $list = $1; + my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; + + # Turn double returns into triple returns, so that we can make a + # paragraph for the last item in a list, if necessary: + $list =~ s/\n{2,}/\n\n\n/g; + my $result = $self->_ProcessListItems($list, $marker_any); + + $result = "<$list_type>\n" . $result . "\n"; + $result; + }egmx; + } + else { + $text =~ s{ + $self->{_list_lead} + $whole_list + }{ + my $list = $1; + my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; + # Turn double returns into triple returns, so that we can make a + # paragraph for the last item in a list, if necessary: + $list =~ s/\n{2,}/\n\n\n/g; + my $result = $self->_ProcessListItems($list, $marker_any); + $result = "<$list_type>\n" . $result . "\n\n"; + $result; + }egmx; + } + + + return $text; +} + + +sub _ProcessListItems { +# +# Process the contents of a single ordered or unordered list, splitting it +# into individual list items. +# + + my ($self, $list_str, $marker_any) = @_; + + + # The $self->{_list_level} global keeps track of when we're inside a list. + # Each time we enter a list, we increment it; when we leave a list, + # we decrement. If it's zero, we're not in a list anymore. + # + # We do this because when we're not inside a list, we want to treat + # something like this: + # + # I recommend upgrading to version + # 8. Oops, now this line is treated + # as a sub-list. + # + # As a single paragraph, despite the fact that the second line starts + # with a digit-period-space sequence. + # + # Whereas when we're inside a list (or sub-list), that line will be + # treated as the start of a sub-list. What a kludge, huh? This is + # an aspect of Markdown's syntax that's hard to parse perfectly + # without resorting to mind-reading. Perhaps the solution is to + # change the syntax rules such that sub-lists must start with a + # starting cardinal number; e.g. "1." or "a.". + + $self->{_list_level}++; + + # trim trailing blank lines: + $list_str =~ s/\n{2,}\z/\n/; + + + $list_str =~ s{ + (\n)? # leading line = $1 + (^[ \t]*) # leading whitespace = $2 + ($marker_any) [ \t]+ # list marker = $3 + ((?s:.+?) # list item text = $4 + (\n{1,2})) + (?= \n* (\z | \2 ($marker_any) [ \t]+)) + }{ + my $item = $4; + my $leading_line = $1; + my $leading_space = $2; + + if ($leading_line or ($item =~ m/\n{2,}/) + or ($item =~ $self->{_list_blockquote_pattern})) { + $item = $self->_RunBlockGamut($self->_Outdent($item)); + } + else { + # Recursion for sub-lists: + $item = $self->_DoLists($self->_Outdent($item)); + chomp $item; + $item = $self->_RunSpanGamut($item); + } + + "

    \n"; + my @alignments; + my $use_row_header = 1; + + # Add Caption, if present + + if ($table =~ s/^$line_start(?:\[\s*(.*)\s*\])?(?:\[\s*(.*?)\s*\])[ \t]*$//m) { + my $table_id = ""; + my $table_caption = ""; + + $table_id = _Header2Label($2); + + if (defined $1) { + $table_caption = $1; + } else { + $table_caption = $2; + } + $result .= "\n"; + + $self->{_crossrefs}{$table_id} = "#$table_id"; + $self->{_titles}{$table_id} = "see table"; # captions with "stuff" in them break links + } + + # If a second "caption" is present, treat it as a summary + # However, this is not valid in XHTML 1.0 Strict + # But maybe in future + + # A summary might be longer than one line + if ($table =~ s/\n$line_start\[\s*(.*?)\s*\][ \t]*\n/\n/s) { + # $result .= "" . $self->_RunSpanGamut($1) . "\n"; + } + + # Now, divide table into header, alignment, and body + + # First, add leading \n in case there is no header + + $table = "\n" . $table; + + # Need to be greedy + + $table =~ s/\n($table_divider)\n(($table_rows)+)//s; + + my $body = ""; + my $alignment_string = ""; + if (defined $1){ + $alignment_string = $1; + } + if (defined $2){ + $body = $2; + } + + # Process column alignment + while ($alignment_string =~ /\|?\s*(.+?)\s*(\||\Z)/gs) { + my $cell = $self->_RunSpanGamut($1); + if ($cell =~ /\+/){ + $result .= "{empty_element_suffix}\n"; + push(@alignments,"center"); + } else { + $result .= " align=\"right\"$self->{empty_element_suffix}\n"; + push(@alignments,"right"); + } + } else { + if ($cell =~ /^\:/) { + $result .= " align=\"left\"$self->{empty_element_suffix}\n"; + push(@alignments,"left"); + } else { + if (($cell =~ /^\./) || ($cell =~ /\.$/)) { + $result .= " align=\"char\"$self->{empty_element_suffix}\n"; + push(@alignments,"char"); + } else { + $result .= "$self->{empty_element_suffix}\n"; + push(@alignments,""); + } + } + } + } + + # Process headers + $table =~ s/^\n+//s; + + $result .= "\n"; + + # Strip blank lines + $table =~ s/\n[ \t]*\n/\n/g; + + foreach my $line (split(/\n/, $table)) { + # process each line (row) in table + $result .= "\n"; + my $count=0; + while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) { + # process contents of each cell + my $cell = $self->_RunSpanGamut($1); + my $ending = $2; + my $colspan = ""; + if ($ending =~ s/^\s*(\|{2,})\s*$/$1/) { + $colspan = " colspan=\"" . length($ending) . "\""; + } + $result .= "\t$cell\n"; + if ( $count == 0) { + if ($cell =~ /^\s*$/) { + $use_row_header = 1; + } else { + $use_row_header = 0; + } + } + $count++; + } + $result .= "\n"; + } + + # Process body + + $result .= "\n\n"; + + foreach my $line (split(/\n/, $body)) { + # process each line (row) in table + if ($line =~ /^\s*$/) { + $result .= "\n\n\n"; + next; + } + $result .= "\n"; + my $count=0; + while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) { + # process contents of each cell + my $cell = $self->_RunSpanGamut($1); + my $ending = ""; + if ($2 ne ""){ + $ending = $2; + } + my $colspan = ""; + my $cell_type = "td"; + if ($count == 0 && $use_row_header == 1) { + $cell_type = "th"; + } + if ($ending =~ s/^\s*(\|{2,})\s*$/$1/) { + $colspan = " colspan=\"" . length($ending) . "\""; + } + if ($alignments[$count] !~ /^\s*$/) { + $result .= "\t<$cell_type$colspan align=\"$alignments[$count]\">$cell\n"; + } else { + $result .= "\t<$cell_type$colspan>$cell\n"; + } + $count++; + } + $result .= "\n"; + } + + # Strip out empty sections + $result =~ s/\s*<\/thead>\s*//s; + + # Handle pull-quotes + + # This might be too specific for my needs. If others want it + # removed, I am open to discussion. + + $result =~ s/
    " . $self->_RunSpanGamut($table_caption). "
    \s*\s*/
    \n\n/s; + + $result .= "\n
    \n"; + $result + }egmx; + + my $table_body = qr{ + ( # wrap whole match in $2 + + (.*?\|.*?)\n # wrap headers in $3 + + [ ]{0,$less_than_tab} + ($table_divider) # alignment in $4 + + ( # wrap cells in $5 + $table_rows + ) + ) + }mx; + + return $text; +} + + +sub _DoAttributes{ + my ($self, $id) = @_; + my $result = ""; + + if (defined $self->{_attributes}{$id}) { + my $attributes = $self->{_attributes}{$id}; + while ($attributes =~ s/(\S+)="(.*?)"//) { + $result .= " $1=\"$2\""; + } + while ($attributes =~ /(\S+)=(\S+)/g) { + $result .= " $1=\"$2\""; + } + } + + return $result; +} + + +sub _StripMarkdownReferences { + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} - 1; + + while ($text =~ s{ + \n\[\#(.+?)\]:[ \t]* # id = $1 + \n? + (.*?)\n{1,2} # end at new paragraph + ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc + } + {\n}sx) + { + my $id = $1; + my $reference = "$2\n"; + + $reference =~ s/^[ ]{0,$self->{tab_width}}//gm; + + $reference = $self->_RunBlockGamut($reference); + + # strip leading and trailing

    tags (they will be added later) + $reference =~ s/^\//s; + $reference =~ s/\<\/p\>\s*$//s; + + $self->{_references}{$id} = $reference; + } + + return $text; +} + +sub _DoMarkdownCitations { + my ($self, $text) = @_; + + $text =~ s{ # Allow for citations without locator to be written + \[\#([^\[]*?)\] # in usual manner, e.g. [#author][] rather than + [ ]? # [][#author] + (?:\n[ ]*)? + \[\s*\] + }{ + "[][#$1]"; + }xsge; + + $text =~ s{ + \[([^\[]*?)\] # citation text = $1 + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + \[\#(.*?)\] # id = $2 + }{ + my $result; + my $anchor_text = $1; + my $id = $2; + my $count; + + # implement equivalent to \citet + my $textual_string = ""; + if ($anchor_text =~ s/^(.*?);\s*//) { + $textual_string = "$1"; + } + + if (defined $self->{_references}{$id} ) { + my $citation_counter=0; + + # See if citation has been used before + foreach my $old_id (@{$self->{_used_references}}) { + $citation_counter++; + $count = $citation_counter if ($old_id eq $id); + } + + if (! defined $count) { + $self->{_citation_counter}++; + $count = $self->{_citation_counter}; + push (@{$self->{_used_references}},$id); + } + + $result = "$textual_string ($count"; + + if ($anchor_text ne "") { + $result .=", $anchor_text"; + } + + $result .= ")"; + } else { + # No reference exists + $result = "$textual_string ($id"; + + if ($anchor_text ne "") { + $result .=", $anchor_text"; + } + + $result .= ")"; + } + + if (_Header2Label($anchor_text) eq "notcited"){ + $result = ""; + } + $result; + }xsge; + + return $text; + +} + +sub _PrintMarkdownBibliography{ + my $self = shift; + my $citation_counter = 0; + my $result; + + foreach my $id (@{$self->{_used_references}}) { + $citation_counter++; + $result.="

    [$citation_counter] $self->{_references}{$id}

    \n\n"; + } + $result .= "
    "; + + if ($citation_counter > 0) { + $result = "\n\n
    \n{empty_element_suffix}\n

    $self->{bibliography_title}

    \n\n".$result; + } else { + $result = ""; + } + + return $result; +} + +sub _GenerateImageCrossRefs { + my ($self, $text) = @_; + + # + # First, handle reference-style labeled images: ![alt text][id] + # + $text =~ s{ + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + my $link_id = lc $3; + + if ($link_id eq "") { + $link_id = lc $alt_text; # for shortcut links like ![this][]. + } + + $alt_text =~ s/"/"/g; + if (defined $self->{_urls}{$link_id}) { + my $label = _Header2Label($alt_text); + $self->{_crossrefs}{$label} = "#$label"; + } + else { + # If there's no such link ID, leave intact: + $result = $whole_match; + } + + $whole_match; + }xsge; + + # + # Next, handle inline images: ![alt text](url "optional title") + # Don't forget: encode * and _ + + $text =~ s{ + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + \( # literal paren + [ \t]* + ? # src url = $3 + [ \t]* + ( # $4 + (['"]) # quote char = $5 ' + (.*?) # title = $6 + \5 # matching quote + [ \t]* + )? # title is optional + \) + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + + $alt_text =~ s/"/"/g; + my $label = _Header2Label($alt_text); + $self->{_crossrefs}{$label} = "#$label"; + $whole_match; + }xsge; + + return $text; +} + +sub _FindMathEquations{ + my ($self, $text) = @_; + + $text =~ s{ + (\]*)id=\"(.*?)\"> # " + }{ + my $label = _Header2Label($2); + my $header = $self->_RunSpanGamut($2); + + $self->{_crossrefs}{$label} = "#$label"; + $self->{_titles}{$label} = $header; + + $1 . "id=\"$label\">"; + }xsge; + + return $text; +} + +sub _DoMathSpans { + # Based on Gruber's _DoCodeSpans + + my ($self, $text) = @_; + my $display_as_block = 0; + $display_as_block = 1 if ($text =~ /^<<[^\>\>]*>>$/); + + # << .. >> delimiters + $text =~ s{ + (?>) + }{ + my $m = "$2"; + my $label = ""; + my @attr = (xmlns=>"http://www.w3.org/1998/Math/MathML"); + + if (defined $3) { + $label = _Header2Label($3); + my $header = $self->_RunSpanGamut($3); + + $self->{_crossrefs}{$label} = "#$label"; + $self->{_titles}{$label} = $header; + } + $m =~ s/^[ \t]*//g; # leading whitespace + $m =~ s/[ \t]*$//g; # trailing whitespace + push(@attr,(id=>"$label")) if ($label ne ""); + push(@attr,(display=>"block")) if ($display_as_block == 1); + + $m = $mathParser->TextToMathML($m,\@attr); + "$m"; + }egsx; + + $display_as_block = 1 if ($text =~ /^\$(?:\S|\S.+?\S)\$$/); + + # $..$ delimiters, there must be no whitespace after the first $ and no whitespace before the second $ + $text =~ s{ + (?_RunSpanGamut($3); + + $self->{_crossrefs}{$label} = "#$label"; + $self->{_titles}{$label} = $header; + } + push(@attr,(id=>"$label")) if ($label ne ""); + push(@attr,(display=>"block")) if ($display_as_block == 1); + + $m = $mathParser->TextToMathML($m,\@attr); + "$m"; + }egsx; + + return $text; +} + +sub _DoDefinitionLists { + # Uses the syntax proposed by Michel Fortin in PHP Markdown Extra + + my ($self, $text) = @_; + my $less_than_tab = $self->{tab_width} -1; + + return $text unless $text =~ /\n[ ]{0,$less_than_tab}\:[ \t]+/; + + my $line_start = qr{ + [ ]{0,$less_than_tab} + }mx; + + my $term = qr{ + $line_start + [^:\s][^\n]*\n + }sx; + + my $definition = qr{ + \n?[ ]{0,$less_than_tab} + \:[ \t]+(.*?)\n + ((?=\n?\:)|\n|\Z) # Lookahead for next definition, two returns, + # or the end of the document + }sx; + + my $definition_block = qr{ + ((?:$term)+) # $1 = one or more terms + ((?:$definition)+) # $2 = by one or more definitions + }sx; + + my $definition_list = qr{ + (?:$definition_block\n*)+ # One ore more definition blocks + }sx; + + $text =~ s{ + ($definition_list) # $1 = the whole list + }{ + my $list = $1; + my $result = $1; + + $list =~ s{ + (?:$definition_block)\n* + }{ + my $terms = $1; + my $defs = $2; + + $terms =~ s{ + [ ]{0,$less_than_tab} + (.*) + \s* + }{ + my $term = $1; + my $result = ""; + $term =~ s/^\s*(.*?)\s*$/$1/; + if ($term !~ /^\s*$/){ + $result = "
    " . $self->_RunSpanGamut($1) . "
    \n"; + } + $result; + }xmge; + + $defs =~ s{ + $definition + }{ + my $def = $1 . "\n"; + $def =~ s/^[ ]{0,$self->{tab_width}}//gm; + "
    \n" . $self->_RunBlockGamut($def) . "\n
    \n"; + }xsge; + + $terms . $defs . "\n"; + }xsge; + + "
    \n" . $list . "
    \n\n"; + }xsge; + + return $text +} + +sub _UnescapeComments{ + # Remove encoding inside comments + # Based on proposal by Toras Doran (author of Text::MultiMarkdown) + + my ($self, $text) = @_; + $text =~ s{ + (?<=) # End comments + }{ + my $t = $1; + $t =~ s/&/&/g; + $t =~ s/</ { tag=>"msqrt", output=>"sqrt", tex=>'', ttype=>"UNARY" }, +"√" => { tag=>"msqrt", output=>"sqrt", tex=>'', ttype=>"UNARY" }, "root" => { tag=>"mroot", output=>"root", tex=>'', ttype=>"BINARY" }, "frac" => { tag=>"mfrac", output=>"/", tex=>'', ttype=>"BINARY" }, "/" => { tag=>"mfrac", output=>"/", tex=>'', ttype=>"INFIX" }, @@ -607,7 +608,9 @@ my %AMSymbol = ( # some greek symbols "alpha" => { tag=>"mi", output=>"α", tex=>'', ttype=>"CONST" }, +"α" => { tag=>"mi", output=>"α", tex=>'', ttype=>"CONST" }, "beta" => { tag=>"mi", output=>"β", tex=>'', ttype=>"CONST" }, +"β" => { tag=>"mi", output=>"β", tex=>'', ttype=>"CONST" }, "chi" => { tag=>"mi", output=>"χ", tex=>'', ttype=>"CONST" }, "delta" => { tag=>"mi", output=>"δ", tex=>'', ttype=>"CONST" }, "Delta" => { tag=>"mo", output=>"Δ", tex=>'', ttype=>"CONST" }, @@ -625,7 +628,9 @@ my %AMSymbol = ( "omega" => { tag=>"mi", output=>"ω", tex=>'', ttype=>"CONST" }, "Omega" => { tag=>"mo", output=>"Ω", tex=>'', ttype=>"CONST" }, "phi" => { tag=>"mi", output=>"ϕ", tex=>'', ttype=>"CONST" }, +"ϕ" => { tag=>"mi", output=>"ϕ", tex=>'', ttype=>"CONST" }, "varphi" => { tag=>"mi", output=>"φ", tex=>'', ttype=>"CONST" }, +"φ" => { tag=>"mi", output=>"φ", tex=>'', ttype=>"CONST" }, "Phi" => { tag=>"mo", output=>"Φ", tex=>'', ttype=>"CONST" }, "pi" => { tag=>"mi", output=>"π", tex=>'', ttype=>"CONST" }, "Pi" => { tag=>"mo", output=>"Π", tex=>'', ttype=>"CONST" }, @@ -636,7 +641,9 @@ my %AMSymbol = ( "Sigma" => { tag=>"mo", output=>"Σ", tex=>'', ttype=>"CONST" }, "tau" => { tag=>"mi", output=>"τ", tex=>'', ttype=>"CONST" }, "theta" => { tag=>"mi", output=>"θ", tex=>'', ttype=>"CONST" }, +"θ" => { tag=>"mi", output=>"θ", tex=>'', ttype=>"CONST" }, "vartheta" => { tag=>"mi", output=>"ϑ", tex=>'', ttype=>"CONST" }, +"ϑ" => { tag=>"mi", output=>"ϑ", tex=>'', ttype=>"CONST" }, "Theta" => { tag=>"mo", output=>"Θ", tex=>'', ttype=>"CONST" }, "upsilon" => { tag=>"mi", output=>"υ", tex=>'', ttype=>"CONST" }, "xi" => { tag=>"mi", output=>"ξ", tex=>'', ttype=>"CONST" }, @@ -645,12 +652,16 @@ my %AMSymbol = ( # binary operation symbols "*" => { tag=>"mo", output=>"⋅", tex=>"cdot", ttype=>"CONST" }, +"⋅" => { tag=>"mo", output=>"⋅", tex=>"cdot", ttype=>"CONST" }, "**" => { tag=>"mo", output=>"⋆", tex=>"star", ttype=>"CONST" }, "//" => { tag=>"mo", output=>"/", tex=>'', ttype=>"CONST" }, "\\\\" => { tag=>"mo", output=>"\\", tex=>"backslash", ttype=>"CONST" }, "setminus" => { tag=>"mo", output=>"\\", tex=>'', ttype=>"CONST" }, +"∖" => { tag=>"mo", output=>"\\", tex=>'', ttype=>"CONST" }, "xx" => { tag=>"mo", output=>"×", tex=>"times", ttype=>"CONST" }, +"×" => { tag=>"mo", output=>"×", tex=>"times", ttype=>"CONST" }, "-:" => { tag=>"mo", output=>"÷", tex=>"div", ttype=>"CONST" }, +"÷" => { tag=>"mo", output=>"÷", tex=>"div", ttype=>"CONST" }, "@" => { tag=>"mo", output=>"∘", tex=>"circ", ttype=>"CONST" }, "o+" => { tag=>"mo", output=>"⊕", tex=>"oplus", ttype=>"CONST" }, "ox" => { tag=>"mo", output=>"⊗", tex=>"otimes", ttype=>"CONST" }, @@ -668,26 +679,41 @@ my %AMSymbol = ( # binary relation symbols "!=" => { tag=>"mo", output=>"≠", tex=>"ne", ttype=>"CONST" }, +"≠" => { tag=>"mo", output=>"≠", tex=>"ne", ttype=>"CONST" }, ":=" => { tag=>"mo", output=>":=", tex=>'', ttype=>"CONST" }, #"lt" => { tag=>"mo", output=>"<", tex=>'', ttype=>"CONST" }, "lt" => { tag=>"mo", output=>"<", tex=>'', ttype=>"CONST" }, "<=" => { tag=>"mo", output=>"≤", tex=>"le", ttype=>"CONST" }, +"≤" => { tag=>"mo", output=>"≤", tex=>"le", ttype=>"CONST" }, "lt=" => { tag=>"mo", output=>"≤", tex=>"leq", ttype=>"CONST", latex=>1 }, ">=" => { tag=>"mo", output=>"≥", tex=>"ge", ttype=>"CONST" }, "geq" => { tag=>"mo", output=>"≥", tex=>'', ttype=>"CONST", latex=>1 }, +"≥" => { tag=>"mo", output=>"≥", tex=>"ge", ttype=>"CONST" }, "-<" => { tag=>"mo", output=>"≺", tex=>"prec", ttype=>"CONST", latex=>1 }, +"≺" => { tag=>"mo", output=>"≺", tex=>"prec", ttype=>"CONST", latex=>1 }, "-lt" => { tag=>"mo", output=>"≺", tex=>'', ttype=>"CONST" }, ">-" => { tag=>"mo", output=>"≻", tex=>"succ", ttype=>"CONST" }, +"≻" => { tag=>"mo", output=>"≻", tex=>"succ", ttype=>"CONST" }, "in" => { tag=>"mo", output=>"∈", tex=>'', ttype=>"CONST" }, +"∈" => { tag=>"mo", output=>"∈", tex=>'', ttype=>"CONST" }, "!in" => { tag=>"mo", output=>"∉", tex=>"notin", ttype=>"CONST" }, +"∉" => { tag=>"mo", output=>"∉", tex=>"notin", ttype=>"CONST" }, "sub" => { tag=>"mo", output=>"⊂", tex=>"subset", ttype=>"CONST" }, +"⊂" => { tag=>"mo", output=>"⊂", tex=>"subset", ttype=>"CONST" }, "sup" => { tag=>"mo", output=>"⊃", tex=>"supset", ttype=>"CONST" }, +"⊃" => { tag=>"mo", output=>"⊃", tex=>"supset", ttype=>"CONST" }, "sube" => { tag=>"mo", output=>"⊆", tex=>"subseteq", ttype=>"CONST" }, +"⊆" => { tag=>"mo", output=>"⊆", tex=>"subseteq", ttype=>"CONST" }, "supe" => { tag=>"mo", output=>"⊇", tex=>"supseteq", ttype=>"CONST" }, +"⊇" => { tag=>"mo", output=>"⊇", tex=>"supseteq", ttype=>"CONST" }, "-=" => { tag=>"mo", output=>"≡", tex=>"equiv", ttype=>"CONST" }, +"≡" => { tag=>"mo", output=>"≡", tex=>"equiv", ttype=>"CONST" }, "~=" => { tag=>"mo", output=>"≅", tex=>"cong", ttype=>"CONST" }, +"≅" => { tag=>"mo", output=>"≅", tex=>"cong", ttype=>"CONST" }, "~~" => { tag=>"mo", output=>"≈", tex=>"approx", ttype=>"CONST" }, +"≈" => { tag=>"mo", output=>"≈", tex=>"approx", ttype=>"CONST" }, "prop" => { tag=>"mo", output=>"∝", tex=>"propto", ttype=>"CONST" }, +"∝" => { tag=>"mo", output=>"∝", tex=>"propto", ttype=>"CONST" }, # new for perl "<" => { tag=>"mo", output=>"<", tex=>'', ttype=>"CONST" }, @@ -701,10 +727,14 @@ my %AMSymbol = ( "or" => { tag=>"mtext", output=>"or", tex=>'', ttype=>"SPACE" }, "not" => { tag=>"mo", output=>"¬", tex=>"neg", ttype=>"CONST" }, "=>" => { tag=>"mo", output=>"⇒", tex=>"implies", ttype=>"CONST" }, +"⇒" => { tag=>"mo", output=>"⇒", tex=>"implies", ttype=>"CONST" }, "if" => { tag=>"mo", output=>"if", tex=>'if', ttype=>"SPACE" }, "<=>" => { tag=>"mo", output=>"⇔", tex=>"iff", ttype=>"CONST" }, +"⇔" => { tag=>"mo", output=>"⇔", tex=>"iff", ttype=>"CONST" }, "AA" => { tag=>"mo", output=>"∀", tex=>"forall", ttype=>"CONST" }, +"∀" => { tag=>"mo", output=>"∀", tex=>"forall", ttype=>"CONST" }, "EE" => { tag=>"mo", output=>"∃", tex=>"exists", ttype=>"CONST" }, +"∃" => { tag=>"mo", output=>"∃", tex=>"exists", ttype=>"CONST" }, "_|_" => { tag=>"mo", output=>"⊥", tex=>"bot", ttype=>"CONST" }, "TT" => { tag=>"mo", output=>"⊤", tex=>"top", ttype=>"CONST" }, "|--" => { tag=>"mo", output=>"⊢", tex=>"vdash", ttype=>"CONST" }, @@ -718,6 +748,14 @@ my %AMSymbol = ( "{" => { tag=>"mo", output=>"{", tex=>'', ttype=>"LEFTBRACKET" }, "}" => { tag=>"mo", output=>"}", tex=>'', ttype=>"RIGHTBRACKET" }, "|" => { tag=>"mo", output=>"|", tex=>'', ttype=>"LEFTRIGHT" }, +"|__" => { tag=>"mo", output=>"⌊", tex=>"lfloor", ttype=>"LEFTBRACKET" }, +"⌊" => { tag=>"mo", output=>"⌊", tex=>"lfloor", ttype=>"LEFTBRACKET" }, +"__|" => { tag=>"mo", output=>"⌋", tex=>"rfloor", ttype=>"RIGHTBRACKET" }, +"⌋" => { tag=>"mo", output=>"⌋", tex=>"rfloor", ttype=>"RIGHTBRACKET" }, +"|~" => { tag=>"mo", output=>"⌈", tex=>"lceil", ttype=>"LEFTBRACKET" }, +"⌈" => { tag=>"mo", output=>"⌈", tex=>"lceil", ttype=>"LEFTBRACKET" }, +"~|" => { tag=>"mo", output=>"⌉", tex=>"rceil", ttype=>"RIGHTBRACKET" }, +"⌉" => { tag=>"mo", output=>"⌉", tex=>"rceil", ttype=>"RIGHTBRACKET" }, # {input:"||", tag:"mo", output:"||", tex:null, ttype:LEFTRIGHT}, "(:" => { tag=>"mo", output=>"〈", tex=>"langle", ttype=>"LEFTBRACKET" }, ":)" => { tag=>"mo", output=>"〉", tex=>"rangle", ttype=>"RIGHTBRACKET" }, @@ -736,9 +774,11 @@ my %AMSymbol = ( "del" => { tag=>"mo", output=>"∂", tex=>"partial", ttype=>"CONST" }, "grad" => { tag=>"mo", output=>"∇", tex=>"nabla", ttype=>"CONST" }, "+-" => { tag=>"mo", output=>"±", tex=>"pm", ttype=>"CONST" }, +"±" => { tag=>"mo", output=>"±", tex=>"pm", ttype=>"CONST" }, "O/" => { tag=>"mo", output=>"∅", tex=>"emptyset", ttype=>"CONST" }, "oo" => { tag=>"mo", output=>"∞", tex=>"infty", ttype=>"CONST" }, "aleph" => { tag=>"mo", output=>"ℵ", tex=>'', ttype=>"CONST" }, +"ℵ" => { tag=>"mo", output=>"ℵ", tex=>'', ttype=>"CONST" }, "..." => { tag=>"mo", output=>"...", tex=>"ldots", ttype=>"CONST" }, ":." => { tag=>"mo", output=>"∴", tex=>"therefore", ttype=>"CONST" }, "/_" => { tag=>"mo", output=>"∠", tex=>"angle", ttype=>"CONST" }, @@ -751,17 +791,20 @@ my %AMSymbol = ( "ddots" => { tag=>"mo", output=>"⋱", tex=>'', ttype=>"CONST" }, "diamond" => { tag=>"mo", output=>"⋄", tex=>'', ttype=>"CONST" }, "square" => { tag=>"mo", output=>"□", tex=>'', ttype=>"CONST" }, -"|__" => { tag=>"mo", output=>"⌊", tex=>"lfloor", ttype=>"CONST" }, -"__|" => { tag=>"mo", output=>"⌋", tex=>"rfloor", ttype=>"CONST" }, -"|~" => { tag=>"mo", output=>"⌈", tex=>"lceil", ttype=>"CONST" }, -"~|" => { tag=>"mo", output=>"⌉", tex=>"rceil", ttype=>"CONST" }, "CC" => { tag=>"mo", output=>"ℂ", tex=>'', ttype=>"CONST" }, +"ℂ" => { tag=>"mo", output=>"ℂ", tex=>'', ttype=>"CONST" }, "NN" => { tag=>"mo", output=>"ℕ", tex=>'', ttype=>"CONST" }, +"ℕ" => { tag=>"mo", output=>"ℕ", tex=>'', ttype=>"CONST" }, "QQ" => { tag=>"mo", output=>"ℚ", tex=>'', ttype=>"CONST" }, +"ℚ" => { tag=>"mo", output=>"ℚ", tex=>'', ttype=>"CONST" }, "RR" => { tag=>"mo", output=>"ℝ", tex=>'', ttype=>"CONST" }, +"ℝ" => { tag=>"mo", output=>"ℝ", tex=>'', ttype=>"CONST" }, "ZZ" => { tag=>"mo", output=>"ℤ", tex=>'', ttype=>"CONST" }, +"ℤ" => { tag=>"mo", output=>"ℤ", tex=>'', ttype=>"CONST" }, "f" => { tag=>"mi", output=>"f", tex=>'', ttype=>"UNARY", func=>"true" }, "g" => { tag=>"mi", output=>"g", tex=>'', ttype=>"UNARY", func=>"true" }, +"€" => { tag=>"mi", output=>"€", tex=>'', ttype=>"CONST" }, +"¤" => { tag=>"mi", output=>"¤", tex=>'\textcurrency', ttype=>"CONST" }, # standard functions "lim" => { tag=>"mo", output=>"lim", tex=>'', ttype=>"UNDEROVER" }, @@ -787,11 +830,30 @@ my %AMSymbol = ( "min" => { tag=>"mo", output=>"min", tex=>'', ttype=>"UNDEROVER" }, "max" => { tag=>"mo", output=>"max", tex=>'', ttype=>"UNDEROVER" }, +# inverse trig functions are in the .js now, let's have them here too +"arcsin" => { tag=>"mo", output=>"arcsin", tex=>'', ttype=>"UNARY", func=>"true" }, +"arccos" => { tag=>"mo", output=>"arccos", tex=>'', ttype=>"UNARY", func=>"true" }, +"arctan" => { tag=>"mo", output=>"arctan", tex=>'', ttype=>"UNARY", func=>"true" }, +"arccot" => { tag=>"mo", output=>"arccot", tex=>'', ttype=>"UNARY", func=>"true" }, +"arcsec" => { tag=>"mo", output=>"arcsec", tex=>'', ttype=>"UNARY", func=>"true" }, +"arccsc" => { tag=>"mo", output=>"arccsc", tex=>'', ttype=>"UNARY", func=>"true" }, +"arcsinh" => { tag=>"mo", output=>"arcsinh", tex=>'', ttype=>"UNARY", func=>"true" }, +"arccosh" => { tag=>"mo", output=>"arccosh", tex=>'', ttype=>"UNARY", func=>"true" }, +"arctanh" => { tag=>"mo", output=>"arctanh", tex=>'', ttype=>"UNARY", func=>"true" }, +"arccoth" => { tag=>"mo", output=>"arccoth", tex=>'', ttype=>"UNARY", func=>"true" }, + +# also, we're quirky like this: +"versin" => { tag=>"mo", output=>"versin", tex=>'', ttype=>"UNARY", func=>"true" }, +"coversin" => { tag=>"mo", output=>"coversin", tex=>'', ttype=>"UNARY", func=>"true" }, +"exsec" => { tag=>"mo", output=>"exsec", tex=>'', ttype=>"UNARY", func=>"true" }, +"excsc" => { tag=>"mo", output=>"excsc", tex=>'', ttype=>"UNARY", func=>"true" }, + # arrows "uarr" => { tag=>"mo", output=>"↑", tex=>"uparrow", ttype=>"CONST" }, "darr" => { tag=>"mo", output=>"↓", tex=>"downarrow", ttype=>"CONST" }, "rarr" => { tag=>"mo", output=>"→", tex=>"rightarrow", ttype=>"CONST" }, "->" => { tag=>"mo", output=>"→", tex=>"to", ttype=>"CONST", latex=>1 }, +"→" => { tag=>"mo", output=>"→", tex=>"to", ttype=>"CONST", latex=>1 }, "|->" => { tag=>"mo", output=>"↦", tex=>"mapsto", ttype=>"CONST" }, "larr" => { tag=>"mo", output=>"←", tex=>"leftarrow", ttype=>"CONST" }, "harr" => { tag=>"mo", output=>"↔", tex=>"leftrightarrow", ttype=>"CONST" }, @@ -863,6 +925,7 @@ sub _parseExpr : method { my $newFrag = $self->_createDocumentFragment(); my ($node, $input, $symbol); do { + $str = _replaceUniSuperSubScripts($str); $str = _removeCharsAndBlanks($str, 0); ($node, $str) = $self->_parseIexpr($str); ($input, $symbol) = $self->_getSymbol($str); @@ -1298,6 +1361,69 @@ sub _removeCharsAndBlanks { return $st; } +BEGIN { +my %UnicodeSuperscripts = ( +"⁰" => "0", +"¹" => "1", +"²" => "2", +"³" => "3", +"⁴" => "4", +"⁵" => "5", +"⁶" => "6", +"⁷" => "7", +"⁸" => "8", +"⁹" => "9", +"⁺" => "+", +"⁻" => "-", +"⁼" => "=", +"⁽" => "(", +"⁾" => ")", +"ⁿ" => "n", +); + +my %UnicodeSubscripts = ( +"₀" => "0", +"₁" => "1", +"₂" => "2", +"₃" => "3", +"₄" => "4", +"₅" => "5", +"₆" => "6", +"₇" => "7", +"₈" => "8", +"₉" => "9", +"₊" => "+", +"₋" => "-", +"₌" => "=", +"₍" => "(", +"₎" => ")", +); + +my $UnicodeSupRE = join '|', keys %UnicodeSuperscripts; +my $UnicodeSubRE = join '|', keys %UnicodeSubscripts; + +# Replaces Unicode superscripts and subscripts with corresponding +# ASCIIMathML syntax +# Arguments: string +# Returns: resultant string +sub _replaceUniSuperSubScripts { + my ($s) = @_; + $s =~ s@(($UnicodeSupRE)+)@ + my $repl = $1; + $repl =~ s/($UnicodeSupRE)/$UnicodeSuperscripts{$1}/eg; + (length $repl) > 1 ? "^($repl)" : "^$repl"; + @egs; + + $s =~ s@(($UnicodeSubRE)+)@ + my $repl = $1; + $repl =~ s/($UnicodeSubRE)/$UnicodeSubscripts{$1}/eg; + (length $repl) > 1 ? "_($repl)" : "_$repl"; + @egs; + + return $s; +} +} + # Removes outermost parenthesis # Arguments: string # Returns: string with parentheses removed diff --git a/lib/Text/MultiMarkdown.pm b/lib/Text/MultiMarkdown.pm new file mode 120000 index 0000000..5dd02ea --- /dev/null +++ b/lib/Text/MultiMarkdown.pm @@ -0,0 +1 @@ +../MultiMarkdown.pm \ No newline at end of file