#!/usr/bin/env python3

"""
Moin wiki format converter.

Copyright (C) 2018-2019, 2021, 2023, 2026 Paul Boddie <paul@boddie.org.uk>

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; either version 3 of the License, or (at your option) any later
version.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
details.

You should have received a copy of the GNU General Public License along with
this program.  If not, see <http://www.gnu.org/licenses/>.
"""

from moinformat import copy_attachments, errors, make_parser, make_serialiser, \
                       Metadata, parse, serialise
from moinformat.io import printstr, stderr
from os.path import split
import sys

try:
    from itertools import izip_longest
except ImportError:
    from itertools import zip_longest as izip_longest



# Long messages.

message_all_with_filenames = """\
Using --all overrides any indicated pagenames. Either --all or the filenames
should be omitted.
"""

message_explicit_pagenames = """\
Explicit pagenames (indicated using --pagename) are only to be specified when
providing filenames without an input directory (indicated using --input-dir).

To indicate pagenames within an input directory, omit any --pagename flags.
"""

message_tree_format_usage = """\
The --tree option cannot be used together with the --format or --output-format
options since the --tree option indicates use of the "pretty" format.
"""



# Options management.

def getmapping(mappings):

    """
    Return the given 'mappings' - a collection of key-then-value items - as a
    dictionary.
    """

    mapping = {}
    key = None

    for arg in mappings:
        if key is None:
            key = arg
        else:
            mapping[key] = arg
            key = None

    return mapping

def getvalue(values, default=None):

    """
    Return the first value from 'values' or 'default' if 'values' is empty or
    the first value tests as false.
    """

    return values and values[0] or default



# Main program.

def main():

    "Interpret program options and perform the conversion."

    dirname, progname = split(sys.argv[0])
    args = sys.argv[1:]

    if "--help" in args:
        show_help(progname)
        sys.exit(0)

    # Option values.

    attachments_dir = []
    document_indexes = []
    filenames = []
    input_formats = []
    input_dir_types = []
    input_dirs = []
    input_encodings = []
    input_page_seps = []
    interwiki = []
    output_dirs = []
    output_encodings = []
    output_formats = []
    output_page_seps = []
    pagenames = []
    root_pagenames = []
    theme_names = []
    variables = []

    # Obtain filenames by default.

    l = filenames

    # Flags.

    all = False
    bundle = False
    common = False
    fragment = False
    macros = False
    no_inline = False
    tree = False
    wikiwords = False

    for arg in args:

        # Flags with no following arguments.

        # Detect all documents.

        if arg == "--all":
            all = True

        # Detect resource bundling.

        elif arg == "--bundle":
            bundle = True

        # Detect use of a common attachments directory.

        elif arg in ("--common", "--common-attachments"):
            common = True

        # Detect fragment output (if serialising).

        elif arg == "--fragment":
            fragment = True

        # Detect macro evaluation.

        elif arg == "--macros":
            macros = True

        # Detect suppression of inline objects.

        elif arg == "--no-inline":
            no_inline = True

        # Detect tree output.

        elif arg == "--tree":
            if output_formats:
                stderr.write(message_tree_format_usage)
                sys.exit(1)
            tree = True

        # Recognise wikiwords (including suppressed wikiwords).

        elif arg == "--wikiwords":
            wikiwords = True

        # Options with following arguments.

        # Switch to document index.

        elif arg == "--attachments-dir":
            l = attachments_dir
            continue

        # Switch to document index.

        elif arg == "--document-index":
            l = document_indexes
            continue

        # Switch to collecting formats.

        elif arg in ("--format", "--output-format"):
            if tree:
                stderr.write(message_tree_format_usage)
                sys.exit(1)
            l = output_formats
            continue

        # Switch to collecting input locations.

        elif arg == "--input-dir":
            l = input_dirs
            continue

        # Switch to collecting input context types.

        elif arg == "--input-dir-type":
            l = input_dir_types
            continue

        # Switch to collecting input encodings.

        elif arg == "--input-encoding":
            l = input_encodings
            continue

        # Switch to collecting input formats.

        elif arg == "--input-format":
            l = input_formats
            continue

        # Switch to collecting input page hierarchy separators.

        elif arg == "--input-page-sep":
            l = input_page_seps
            continue

        # Switch to collecting mappings.

        elif arg in ("--interwiki", "--mapping"):
            l = interwiki
            continue

        # Switch to collecting output locations.

        elif arg == "--output-dir":
            l = output_dirs
            continue

        # Switch to collecting output encodings.

        elif arg == "--output-encoding":
            l = output_encodings
            continue

        # Switch to collecting output page hierarchy separators.

        elif arg == "--output-page-sep":
            l = output_page_seps
            continue

        # Switch to collecting page names.

        elif arg == "--pagename":
            l = pagenames
            continue

        # Switch to collecting root page names.

        elif arg == "--root":
            l = root_pagenames
            continue

        # Switch to collecting theme names.

        elif arg == "--theme":
            l = theme_names
            continue

        # Switch to collecting variables.

        elif arg == "--variables":
            l = variables
            continue

        # Stop collecting options and arguments.

        elif arg == "-f":
            pass

        # Collect options and arguments.

        else:
            l.append(arg)

            # Collect multiple mappings.

            if l is interwiki or l is variables:
                continue

        # Collect filenames normally.

        l = filenames

    input_format = input_formats and input_formats[0] or "moin"
    output_format = tree and "pretty" or output_formats and output_formats[0] or "html"
    input_dir = getvalue(input_dirs)
    output_dir = getvalue(output_dirs)

    # Define metadata.

    metadata = Metadata({
        "attachments"       : getvalue(attachments_dir, "attachments"),
        "bundle"            : bundle,
        "common_attachments": common,
        "document_index"    : getvalue(document_indexes),
        "fragment"          : fragment,
        "input_context"     : input_dir and \
                              getvalue(input_dir_types, "directory") or \
                              "standalone",
        "input_encoding"    : getvalue(input_encodings),
        "input_filename"    : input_dir,
        "input_format"      : input_format,
        "input_separator"   : getvalue(input_page_seps),
        "interwiki"         : getmapping(interwiki),
        "link_format"       : output_format,
        "no_inline"         : no_inline,
        "output_context"    : output_dir and "directory" or "standalone",
        "output_encoding"   : getvalue(output_encodings),
        "output_format"     : output_format,
        "output_filename"   : output_dir,
        "output_separator"  : getvalue(output_page_seps),
        "root_pagename"     : getvalue(root_pagenames, "FrontPage"),
        "theme_name"        : not fragment and \
                              "%s.%s" % (getvalue(theme_names, "default"), output_format) or None,
        "variables"         : getmapping(variables),
        "wikiwords"         : wikiwords,
        })

    # Define the input context and theme.

    input = metadata.get_input()
    theme = metadata.get_theme()

    # Treat filenames as pagenames if an input directory is indicated and if no
    # pagenames are explicitly specified.

    if input_dir:
        if pagenames:
            stderr.write(message_explicit_pagenames)
            sys.exit(1)

        if all:
            if filenames:
                stderr.write(message_all_with_filenames)
                sys.exit(1)
            else:
                filenames = input.all()

        pagenames = filenames
        filenames = []

    # Open each file or page, parse the content, serialise the document.

    not_written = []

    for pagename, filename in izip_longest(pagenames, filenames):
        stderr.write("%r ...\n" % (pagename or filename))

        # Define a pagename if missing.

        pagename = pagename or split(filename)[-1]
        metadata.set("pagename", pagename)

        # Read either from a filename or using a pagename.

        if filename:
            pagetext = input.readfile(filename)
        else:
            pagetext = input.readpage(pagename)

        # Parse the page content.

        p = make_parser(metadata)
        d = parse(pagetext, p)

        if macros:
            p.evaluate_macros()

        p.update_metadata(metadata)

        # Obtain a serialiser using the configuration.

        serialiser = make_serialiser(metadata)
        outtext = serialise(d, serialiser)

        # Show a document tree for debugging purposes, if requested.

        if tree:
            printstr(outtext)
            continue

        # With a theme, apply it to the text.

        if theme:
            outtext = theme.apply(outtext)

        # If reading from a file, show the result. Otherwise, write to the
        # output context.

        output = metadata.get_output()

        if not output.can_write():
            printstr(outtext)
        else:
            if output.writepage(outtext, pagename):
                stderr.write("%r\n" % pagename)

                copy_attachments(p, input, output, all=True)
            else:
                not_written.append(pagename)
                stderr.write("%r not written!\n" % pagename)

    # Install any theme resources.

    if theme:
        theme.install_resources()

    # Report unwritten pages.

    if not_written:
        stderr.write("Not written: %s\n" % ", ".join(map(repr, not_written)))

def show_help(progname):

    "Show the help text."

    stderr.write(help_text % progname)

help_text = """\
Usage: %s [ <options> ] ( --all | [ -f] <filename>... )

Input file options:

--all               Detect all document files in the specified input directory
-f                  Indicate a specific file, needed if --interwiki, --mapping
                    or --variables immediately precedes the filename

Input options:

--common            Obtain attachments from a common directory for all pages,
                    rather than each page having its own subdirectory of a
                    top-level attachments directory
--input-dir         Indicate an input directory containing document files
--input-dir-type    Indicate the type of input directory involved
                    (default: directory)
--input-encoding    Indicate the character encoding used in document files
--input-format      Indicate the format of the parsed documents
                    (default: moin)
--input-page-sep    Indicate the separator used in filenames to encode
                    hierarchical relationships (subpages and descendant pages)
--pagename          Indicate the page name corresponding to an indicated
                    filename, with each successive instance of this option
                    corresponding to each successive filename instance

Output options:

--bundle            Bundle resources such as stylesheets within every document,
                    useful for publishing documents that need to be copied or
                    distributed individually
--document-index    Provide a "DocumentIndex" filename to be used in links in
                    HTML format output, useful for local file browsing instead
                    of Web-published content
--format            Indicate the format to be used for serialised documents;
                    equivalent to --output-format
                    (default: html)
--fragment          Indicates that an output fragment, not an entire document,
                    is to be generated, skipping any theming activities
--no-inline         Suppress inline objects in serialised documents, linking to
                    separate objects instead
--output-dir        Indicate an output directory to contain serialised document
                    files
--output-encoding   Indicate the character encoding used in serialised document
                    files
--output-format     Indicate the format to be used for serialised documents;
                    equivalent to --format 
                    (default: html)
--output-page-sep   Indicate the separator used in filenames to encode
                    hierarchical relationships (subpages and descendant pages)
--theme             Indicate a theme for serialised documents, typically
                    requiring an output directory to be useful
--tree              Produce a document tree representation on standard output
                    instead of generating output files
--wikiwords         Recognise wikiwords including suppressed wikiwords, the
                    latter being of the form !WikiWord and, in conventional Moin
                    markup, suppressing the linking of WikiWord to another page;
                    unlike Moin, bare wikiwords do not produce links with this
                    tool unless this option is indicated

Configuration options:

--interwiki         Indicate a name and corresponding URL pairs to be used to
                    translate interwiki links; equivalent to --mapping
--macros            Perform macro evaluation/expansion before serialising
                    documents
--mapping           Indicate name and corresponding URL pairs to be used to
                    translate interwiki links; equivalent to --interwiki
--root              Indicate the root page name to be used
                    (default: FrontPage)
--variables         Indicate name and corresponding value pairs to be used to
                    translate variables

"""

if __name__ == "__main__":
    try:
        main()
    except errors.ProcessingError as exc:
        printstr(str(exc))

# vim: tabstop=4 expandtab shiftwidth=4
