A small freedom area.

This blog

Thu 17 Mar 2011

blog, prog, python, static blog, kiss, first

Getting rid of PHP and MySQL on my server was one of my quests, and rewriting my old French blog was my first assignment in order to fulfill it. Since this is now done, I'll share further in this first post its source code.

So this is the direct continuation of my blog where I'll try to talk mostly on various technical and fun nerds stuff. You will also note my English is even worse than my approximative French, but I hope it will still be readable. I will try to rewrite the most interesting stuff of pilule-rouge.net before I finally drop it.

Speaking of the blog features, here is an exhaustive list:

And that's all. No there is no comments system, but you can contact me on IRC/jabber/mail (see page footer for more information).

Here is the simple python script I use:

#!/usr/bin/env python2

import glob, os, sys, unicodedata, markdown, time, urllib, email.utils, re, shutil
from pygments.formatters import get_formatter_by_name

TPL_BASE = '''<!doctype html>
<html>
 <head>
  <title>%(title)s</title>
  <link rel="icon" type="image/png" href="/favicon.png" />
  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
  <link href="/rss.xml" rel="alternate" type="application/rss+xml" title="blog.pkh.me" />
  <link rel="stylesheet" type="text/css" href="/style.css" />
  <link rel="stylesheet" type="text/css" href="/pygments.css" />
  <meta name="keywords" content="%(keywords)s" />
  <meta name="viewport" content="width=device-width" />
 </head>
 <body>
 <header><a href="/index.html">A small freedom area.</a></header>
  <div id="content">%(content)s</div>
 <footer>
  <a href="http://ubitux.fr">www/misc</a>
  | mail+jabber: <i>u pkh.me</i>
  | irc: <i>ubitux@<a href="http://freenode.net">freenode</a>/<a href="http://yozora-irc.net">yozora</a></i>
 </footer>
 </body>
</html>'''

TPL_POST = '''<h1><a href="#content">%(title)s</a></h1>
<p class="date">%(date)s</p>
%(tags)s
<article>%(content)s</article>
<p id="idxurl"><a href="/index.html">index</a></p>'''

TPL_RSS = '''<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">
 <channel xmlns:atom="http://www.w3.org/2005/Atom">
  <atom:link href="http://blog.pkh.me/rss.xml" rel="self" type="application/rss+xml" />
  <title>A small freedom area RSS</title>
  <description>Default feed for blog.pkh.me</description>
  <link>http://blog.pkh.me/</link>
%s
 </channel>
</rss>'''

TPL_RSS_ITEM = '''<item>
 <guid>%(guid)s</guid>
 <link>%(link)s</link>
 <title>%(title)s</title>
 <pubDate>%(date)s</pubDate>
 <description>%(desc)s</description>
</item>'''

def get_page_name(base, n):
    if n == 1 and base == 'index':
        return 'index.html'
    return 'x/%s-p%d.html' % (base, n) if n != 1 else 'x/%s.html' % base

def escape(s):
    return s.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;')

def get_tag_html(tags, current=None):
    tdata = []
    for tag in tags:
        if tag == current:
            tdata.append(tag)
        else:
            tdata.append('<a href="/x/index-%s.html">%s</a>' % (tag, tag))
    return '<p class="tags">%s<p>\n' % ', '.join(tdata)

def get_all_tags(data):
    tags = {}
    for entry in data:
        for tag in entry['tags']:
            tags[tag] = tags.get(tag, 0) + 1
    tags_list = [item[0] for item in sorted(tags.items(), key=lambda x: x[1])][::-1]
    return ', '.join(tags_list)

def write_html_index(data, tag=None):
    files = []
    title = 'ubitux/blog'
    bname = 'index'
    data  = data[::-1]
    if tag:
        bname += '-' + tag
        title += '/' + tag
        data = filter(lambda e: tag in e['tags'], data)
    else:
        all_tags = get_all_tags(data)

    n = 10
    pages = [data[i:i+n] for i in range(0, len(data), n)]
    for (n, entries) in enumerate(pages, 1):
        fname = os.path.join('www', get_page_name(bname, n))
        print('   writing %s' % fname)

        pdata = []
        for i in range(1, len(pages) + 1):
            if i == n:
                pdata.append('%d' % i)
            else:
                pdata.append('<a href="/%s">%d</a>' % (get_page_name(bname, i), i))
        raw = '<p class="pages">%s</p>\n' % ' '.join(pdata)

        for entry in entries:
            raw += '''<h1><a href="/%(page)s">%(title)s</a></h1>
                      <p class="date">%(date)s</p>
                      <p>%(preview)s</p>''' % entry
            raw += get_tag_html(entry['tags'], tag)

        files.append(fname)
        open(fname, 'w').write(TPL_BASE % {
            'keywords': tag if tag else all_tags,
            'title':    title,
            'content':  raw,
        })

    return files

def write_page(ifile, ofile=None, postid=-1):
    ts = int(re.split(r'[^0-9]', ifile.rsplit('/', 1)[-1], 1)[0] or '0')
    datefmt = time.strftime('%a %d %b %Y', time.gmtime(ts))
    meta, rawcontent = open(ifile, 'r').read().split('\n\n', 1)

    title, tags = None, []
    for line in meta.splitlines():
        k, v = line.split(':', 1)
        v = v.strip()
        if k == 'title':
            title = escape(v)
        elif k == 'tags':
            tags = [t.strip() for t in v.split(',')]

    if not ofile:
        ofile = unicodedata.normalize('NFKD', title.decode('utf-8')).encode('ascii', 'ignore')
        ofile_rel = 'p/%d-%s.html' % (postid, '-'.join(ofile.strip().lower().replace('/', ' ').split()))
        ofile = os.path.join('www', ofile_rel)
    else:
        ofile_rel = ofile

    print('   writing %-35s -> %s' % (ifile, ofile))

    page_info = {
        'ofile':     ofile,
        'title':     title,
        'keywords':  ', '.join(tags),
        'ts':        ts,
        'date':      datefmt,
        'preview':   ' '.join(rawcontent.split()[:20]) + '...',
        'page':      urllib.quote(ofile_rel),
        'tags':      tags,
        'content':   TPL_POST % {
            'title':   title,
            'date':    datefmt,
            'tags':    get_tag_html(tags),
            'raw_url': '/' + ifile,
            'content': markdown.markdown(rawcontent.decode('utf-8'),
                                         ['codehilite', 'codeinsert']).encode('utf-8'),
        }
    }
    open(ofile, 'w').write(TPL_BASE % page_info)
    return page_info

def write_pages(raws):
    fulltaglist = set()
    index_data = []

    print(':: write pages')
    for i, raw in enumerate(raws):
        page_info = write_page(raw, postid=i)
        index_data.append(page_info)
        fulltaglist |= set(page_info['tags'])

    print('\n:: write indexes')
    files = write_html_index(index_data)
    for tag in fulltaglist:
        files += write_html_index(index_data, tag)

    print('\n:: cleanup')
    for f in set(files + ['www/x/index.html']) ^ set(['www/index.html'] + glob.glob('www/x/*.html')):
        print('   rm %s' % f)
        os.unlink(f)
    plist = ['www/p/index.html'] + [info['ofile'] for info in index_data]
    for f in set(plist) ^ set(glob.glob('www/p/*.html')):
        print('   rm %s' % f)
        os.unlink(f)

    print('\n:: update pygments CSS')
    csshilite = open('www/pygments.css', 'w')
    csshilite.write(get_formatter_by_name('html', style='paraiso-dark').get_style_defs('.codehilite'))
    csshilite.close()

    print('\n:: RSS')
    rss_content = ''
    rss = open('www/rss.xml', 'w')
    for item in index_data[::-1][:10]:
        link = 'http://blog.pkh.me/' + item['page']
        info = {
            'guid':       link,
            'short-link': item['page'],
            'link':       link,
            'title':      item['title'],
            'date':       email.utils.formatdate(item['ts']),
            'links':      '',
            'desc':       escape(item['preview']),
        }
        rss_content += TPL_RSS_ITEM % info
    rss_content = TPL_RSS % rss_content
    rss.write(rss_content)
    rss.close()

if len(sys.argv) == 3:
    write_page(sys.argv[1], ofile=sys.argv[2])
elif len(sys.argv) == 1:
    write_pages(sorted(glob.glob('raw/*.raw')))
else:
    print 'usage:\n\t%s [<input> <output>]' % sys.argv[0]

And the insert code module for markdown:

import os
import markdown

class CodeInsertPreprocessor(markdown.preprocessors.Preprocessor):
    def run(self, lines):
        new_lines = []
        for line in lines:
            if not line.startswith('    ||'):
                new_lines.append(line)
                continue
            lang, fname = line[4+2:].split(':')
            new_lines += ['    :::' + lang]
            f = open(os.path.join('www', fname), 'r')
            new_lines += ['    ' + l.rstrip() for l in f.readlines()]
            f.close()
        return new_lines

class CodeInsertExtension(markdown.Extension):
    def extendMarkdown(self, md, md_globals):
        md.preprocessors.add('codeinsert', CodeInsertPreprocessor(md), '<reference')

def makeExtension(**kwargs):
    return CodeInsertExtension(**kwargs)

And ./add:

#!/bin/sh

set -x
[ $# -ne 1 ] && echo usage: $0 rawfile && exit
cp -n $1 raw/`date +%s`-$(basename $1).raw && rm -f $1

And finally, how to use it:

% ./run.py awesome-post draft.html                          # when writing a post and need a preview
% ./add awesome-post                                        # add awesome-post to the raw/ directory with the current date
% ./run.py                                                  # reload the website content
% ./run.py raw/1234567890-awesome-post.raw p/42-foobar.html # I just noticed and fixed a typo so I'm reloading that page only

index