mkdocs-semiliterate/mkdocs_semiliterate/plugin.py

""" md
## Usage

Once this plugin is [installed](../drone_install.md), in your `mkdocs.yml`
file just replace the plugin name `simple` with `semiliterate`. It accepts all
of the same parameters, so `mkdocs` will still work as before, and
you will have immediate access to all of the following extensions. (Note that
this documentation assumes a familiarity with the
[usage](https://athackst.github.io/mkdocs-simple-plugin/mkdocs_simple_plugin/plugin/)
of the `simple` plugin.)
"""

from mkdocs import utils
from mkdocs.config import config_options
from mkdocs_simple_plugin.plugin import SimplePlugin, StreamExtract, LazyFile

import re
import yaml


class StreamInclusion(StreamExtract):
    """ md  An extension of the StreamExtract class which adds

### Inclusion syntax

While extracting content from a file (because it matches one of the
`semiliterate` patterns, rather than just one of the `include_extensions`),
an unescaped expression of the form

`{! FILENAME YAML !}`

(which may span multiple lines) will trigger file inclusion. The FILENAME may
be a bare word, in which case it cannot contain whitespace, or it may be
enclosed in single or double quotes. Note that FILENAME is interpreted relative
to the directory in which the file containing the `{! .. !}` expression
resides. The YAML is interpreted exactly as the extraction options to a
`semiliterate` item as
[documented](https://athackst.github.io/mkdocs-simple-plugin/mkdocs_simple_plugin/plugin/index.html#plugin_usage)
for the `simple` extension. The text extracted from FILENAME
is interpolated at the current location in the file currently being written.
Recursive inclusion is supported.

Note that the `{! ... !}` directive must be in lines that semiliterate would
normally copy. That is, it does not examine lines before the `start` regexp
is encountered, or after the `terminate` regexp, or between instances of
`stop` and `start`. It also doesn't check any text written from lines that
match these special expressions. Moreover, on such normally-transcribed lines,
it's the text **after** the application of any semiliterate `replace`ments that
is checked for `{! ... !}`.
     """

    def __init__(self, input_stream, output_stream, **kwargs):
        """ md
### Adjusted semiliterate options

The `start` regular-expression parameter to a `semiliterate` file-inclusion
pattern is now optional. If omitted, it means that extraction begins immediately
with the first line of a file; in this case, `pause` and `terminate` retain
their usual meanings, although there is not currently any way to resume from a
`pause` when `start` is not specified. This adjustment to `semiliterate`
parameters makes it easier to extract "front-matter" style documentation from
files. It also means that a plain `{! file.md !}` directive will simply
incorporate the full contents of `file.md`.
        """
        self.start_hot = False
        if 'start' not in kwargs:
            kwargs['start'] = 'dummy'
            self.start_hot = True
        super().__init__(input_stream, output_stream, **kwargs)

    def extract(self):
        if self.start_hot:
            self.extracting = True
            self.start = False
        super().extract()

    include_open = re.compile(r'''(?<![`\\])(\{\!\s*)([\s'"])''')
    include_quoted_file = re.compile(
        r'''(['"])(?P<fn>.*?)\1\s+(?P<yml>[\s\S]*?)\s?\!\}''')
    include_bare_file = re.compile(r'\s(?P<fn>.*?)\s+(?P<yml>[\s\S]*?)\s?\!\}')

    def extract_line(self, line):
        """Copy line to the output stream, applying all specified replacements
           and handling inclusion syntax.
        """
        line = self.replace_line(line)
        include_match = StreamInclusion.include_open.search(line)
        if not include_match:
            self.transcribe(line)
            return
        # OK, we have found (the start of) an inclusion and must process it
        preamble = line[:include_match.start()]
        remainder = line[include_match.end(1):]
        body_pattern = StreamInclusion.include_quoted_file
        if include_match[2].isspace():
            body_pattern = StreamInclusion.include_bare_file
        body_match = body_pattern.search(remainder)
        if not body_match:
            for extra_line in self.input_stream:
                remainder += self.replace_line(extra_line)
                body_match = body_pattern.search(remainder)
                if body_match:
                    break
        if not body_match:
            errmsg = "semiliterate: End of file while scanning for `!}`"
            utils.log.error(errmsg)
            raise EOFError(errmsg)
        include_path = self.include_root + '/' + body_match['fn']
        new_root = re.match(r'(.*)/', include_path)[1]
        include_parameters = yaml.safe_load(body_match['yml'])
        if not include_parameters:
            include_parameters = {}
        with open(include_path) as include_file:
            self.transcribe(preamble)
            inclusion = StreamInclusion(
                include_file, self.output_stream, include_root=new_root,
                **include_parameters)
            if inclusion.productive():
                self.wrote_something = True
        self.transcribe(remainder[body_match.end():])


class SemiliteratePlugin(SimplePlugin):
    r""" md   An extension of the mkdocs-simple-plugin
### Additional plugin parameters

`semiliterate` adds a couple of new plugin parameters to further tailor its
behavior as compared to `simple`. They are described in this section, with
default values in parentheses at the beginning of each entry.

{! plugin.py ---
  start: '[*]SimplePlugin.config_scheme'
  terminate: '^\s*\)'
  replace:
  - ["\\('(.*)',\\s*$", '\1\n']
  - ['config_options.Type.*?default=([^\)]*)', ':  (\1)']
  - '^\s*#(.*\s*)$'
!}
    """

    config_scheme = (
        # Note documentation of each new parameter **follows** the parameter.
        *SimplePlugin.config_scheme,
        ('copy_standard_markdown',
         config_options.Type(bool, default=False)),
        # Whether to add MkDocs' list of standard Markdown extensions to the
        #    `include_extensions` parameter so that Markdown files will be
        #    directly copied to the docsite. Note that the `simple` behavior
        #    corresponds to a _true_ value for `copy_standard_markdown`, but
        #    `semiliterate` still incorporates all standard Markdown files
        #    because of the following `extract_standard_markdown` parameter.
        ('extract_standard_markdown',
         config_options.Type(dict, default={})),
        # If the `enable` key of this dict parameter is true
        #    (which it defaults to),
        #    it adds a semiliterate block causing extraction (and hence
        #    include-directive processing) from all standard Markdown files
        #    (as defined by MkDocs). The remaining keys of this parameter are
        #    included as parameters of that semiliterate block. Thus, the
        #    default value of the parameter arranges for Markdown file to be
        #    copied "as-is", except possibly for embedded inclusions.
        #    On the other hand, setting it to `{enable: false}` will prevent
        #    automatic extraction from standard Markdown files.
        ('report_docs_build',
         config_options.Type(bool, default=False))
        # If true, the name of the temporary directory to which generated docs
        #    files are copied/extracted will be written to standard output
        #    (even if the `-v` verbose option to mkdocs is not specified).
    )

    def build_docs(self):
        if self.config['report_docs_build']:
            utils.log.info(
                f"semiliterate: generating docs in {self.build_docs_dir}")
        if not self.config['copy_standard_markdown']:
            self.include_extensions = self.config['include_extensions']
        if self.config['extract_standard_markdown'].get('enable', True):
            ext_pat = '|'.join(re.escape(s) for s in utils.markdown_extensions)
            self.semiliterate.append(dict(
                pattern=re.compile(f"^(.*(?:{ext_pat}))$"),
                destination=r'\1',
                **self.config['extract_standard_markdown']))
        return super().build_docs()

    # FIXME: This method is copied from simple, just to insert a control
    # over what class is used to do the extraction. Try to get this inserted as
    # the method of the same name in simple.
    def extract_from(self, from_directory, name, destination_directory):
        """Extract content from the file in _from_directory_ named _name_
           to a file or files in _destination_directory_, as specified by
           the semiliterate parameters.
        """
        new_paths = []
        original = "{}/{}".format(from_directory, name)
        for item in self.semiliterate:
            name_match = item['pattern'].search(name)
            if name_match:
                new_name = ''
                if name_match.lastindex:
                    new_name = (name[:name_match.start(name_match.lastindex)]
                                + '.md'
                                + name[name_match.end(name_match.lastindex):])
                if 'destination' in item:
                    new_name = name_match.expand(item['destination'])
                if not new_name:
                    raise LookupError(
                        "mkdocs-simple-plugin: No last group in match of"
                        + "{} to {} and no destination".format(
                            item['pattern'], name))
                new_file = LazyFile(destination_directory, new_name)
                with open(original) as original_file:
                    utils.log.debug(
                        "mkdocs-simple-plugin: Scanning {}...".format(original))
                    productive = self.try_extraction(
                        original_file, from_directory, new_file, **item)
                    new_file.close()
                    if productive:
                        new_path = "{}/{}".format(destination_directory,
                                                  new_name)
                        utils.log.debug(
                            "        ... extracted {}".format(new_path))
                        new_paths.append((original, new_path))
        return new_paths

    def try_extraction(self, original_file, root, new_file, **kwargs):
        return StreamInclusion(
            original_file, new_file, include_root=root, **kwargs).productive()