#!/usr/bin/python2

# Licence: GPL 2 (c) Marc MERLIN

"""Extracts a blog from an archive directory of blogger posts, using a pattern
(perma string or perma unix time (old format))

Usage:
  integrate_blog.py blogname|permatime [file to add to]

    if the destination file is not given, output is sent to STDOUT
"""

__author__ = 'marc_soft@merlins.org'

import sys
import re
import glob
import os
import extract_blog_entry

VERBOSE = 0
BLOGDIR = "/home/merlin/blog" # CHANGEME
BLOG_ITEMS_PAT = BLOGDIR+"/[0-9][0-9][0-9][0-9]/[0-9][0-9]"
INSERT_MARKER = "<!-- Insert new blog here -->\n"
INSERT_LI_HEADER = "<!-- Insert new li header here -->\n"
INSERTPERMA = "Insert new perma name here\n"
TOP_ADD = True

# In my blog template, I match
# <A HREF="/cgi-bin/blogsnippet?perma=113199272368300001>
# Adjust this pattern to your template if your blogs also contain unix times
# or leave to the default if you don't use that (it'll just be ignored)
BLOGUNIXTIMEPERMPAT = 'perma='	# CHANGEME

def die(msg=""):
  print >> sys.stderr, msg
  sys.exit(255)

def Usage(code, msg=''):
  if code:
    fd = sys.stderr
  else:
    fd = sys.stdout
  print >> fd, msg+"\n"
  print >> fd, (__doc__)
  sys.exit(code)

# We create a closure so that we can pass the Verbose function to our object
# (the idea being of course to pass the verbose level along)
def MakeVerbose(mast_level=1):
  def Verbose(msg, level=1):
    if level <= mast_level:
      print >> sys.stderr, msg
  return(Verbose)

def RotateFile(file):
  if not os.path.exists(file):
    Verbose ("%s doesn't exist, rotating to that" % file, 4)
    return

  if not re.search(r".old\d+$", file):
    newfile = file + ".old1"
  else:
    idx = re.search(r".old(\d+)$", file)
    idx = int(idx.group(1))
    newfile = re.sub(".old" + str(idx), ".old" + str(idx+1), file)

  RotateFile(newfile)
  Verbose("rotate %s to %s" % (file, newfile), 4)
  os.rename(file, newfile)
  return(file)
  


# takes pattern [outputfile] (default to stdout)
def main():
  filename = ""

  try:
    pattern = sys.argv[1]
  except IndexError:
    Usage (1, "pattern required")

  if len(sys.argv) == 3:
    output_html = sys.argv[2]
    output_html_new = re.sub(r'\.html$', '.new.html', output_html)
    Verbose("Adding blog to %s, writing to %s" % (output_html, output_html_new))
  else:
    output_html = ""

  if len(sys.argv) > 3:
    Usage(1, "only one or two arguments accepted")

  # Separate unix time patterns from filenames, allow dates back when unix
  # time was only 17 digits
  # note that matching blog posts by the old unix time perma date doesn't work
  # on new blogs unless your template contains old permas somewhere. 
  # Adjust BLOGUNIXTIMEPERMPAT as necessary
  if re.search(r'^\d{17,18}$', pattern):
    Verbose("Pattern %s is unix time" % pattern)

    # yes, yes, I'm forking a shell command, sue me :)
    # (much shorter than writing the equivalent in python)
    f = os.popen("grep '%s%s[^0-9]' %s/*.html" % (BLOGUNIXTIMEPERMPAT, pattern, BLOG_ITEMS_PAT))
    try:
      filename = f.readlines()[0]
      # strip end for the grep text and newline
      filename = (re.sub(':.*', '', filename))[0:-1]
    except IndexError:
      die("Couldn't find pattern " + pattern)
  else:
    # Remove an optional trailing .html in the pattern looked for
    pattern = re.sub(r'\.html$', '', pattern)
    Verbose("Pattern %s is filename" % pattern)
    # even though it's unconditionally re-added here
    filename = glob.glob(BLOG_ITEMS_PAT + "/" + pattern + ".html")
    if len(filename) == 0:
      die("No blog match for "+pattern)
    elif len(filename) > 1:
      die("bug: %d matches for %s" % (len(filename), pattern))
    filename = filename[0]

  Verbose("found blog in " + filename)
  
  # let the default handlers catch errors here
  file = "".join(open(filename).readlines())

  found_marker = False
  html = extract_blog_entry.ScrapeHtml(pattern, Verbose)
  html.feed(file)
  blog_entry = "\n".join(html.capture)

  if not output_html:
    print blog_entry
  else:
    src = open(output_html)
    dest = open(output_html_new, "w", 0644)

    #while (line=src.readline()):
    line = True
    while (line):
      line = src.readline()

    
      if line == INSERTPERMA:
	if TOP_ADD:
	  dest.write(pattern+"\n")
	  dest.write(line)
	else:
	  dest.write(line)
	  dest.write(pattern+"\n")
      elif line == INSERT_LI_HEADER:
        entry = '<li><a href="#%s">%s</a>\n' % (pattern, html.blog_title)
	if TOP_ADD:
	  dest.write(entry)
	  dest.write(line)
	else:
	  dest.write(line)
	  dest.write(entry)
      elif line == INSERT_MARKER:
	found_marker = True
	# do we add at the top or the bottom?
	if TOP_ADD:
	  dest.write("<br><br>\n" + blog_entry)
	  dest.write("\n\n" + line)
	else:
	  dest.write(line + "\n")
	  dest.write("<br><br>\n" + blog_entry)
      else:
	dest.write(line)

    src.close()
    dest.close()

    RotateFile(output_html)
    os.rename(output_html_new, output_html)

    if not found_marker:
      die("Did not find %s in %s" % (INSERT_MARKER, output_html_new))


if __name__ == '__main__':
  # We need to build this function at the top so that it's visible in sub
  # functions like RotateFile
  Verbose = MakeVerbose(VERBOSE)
  main()
