########################################################################
#       KazoSQL - A Kazorum (DAJ_Glass-themed) to MySQL-converter      #
#            Copyright (C) 2007 Nindra (nindrag a gmail.com)           #
#                                                                      #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or    #
# (at your option) any later version.                                  #
#                                                                      #
# This program is distributed in the hope that it will be useful,      #
# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
# GNU General Public License for more details.                         #
#                                                                      #
# You should have received a copy of the GNU General Public License    #
# along with this program.  If not, see <http://www.gnu.org/licenses/>.#
########################################################################
from BeautifulSoup import BeautifulSoup
import elementtree.ElementTree as ET
import sys
import md5
import re
import sets
import MySQLdb

db=MySQLdb.connect(host="127.0.0.1",passwd="pw",user="user",db="kazosql")
x = 1
dbc = db.cursor()

def uniquer(seq, idfun=None):
    if idfun is None:
	def idfun(x): return x
    seen = {}
    result = []
    for item in seq:
	marker = idfun(item)
	# in old Python versions:
	# if seen.has_key(marker)
	# but in new ones:
	if marker in seen: continue
	seen[marker] = 1	
	result.append(item)
    return result

def StripTags(text):
    finished = 0
    while not finished:
	finished = 1
	# check if there is an open tag left
	start = text.find("<")
	if start >= 0:
	    # if there is, check if the tag gets closed
	    stop = text[start:].find(">")
	    if stop >= 0:
		# if it does, strip it, and continue loop
		text = text[:start] + text[start+stop+1:]
		finished = 0
    return text

def CreatePost(ID, post, poster, date, category, thread):
    global dbc
    md5hash = md5.new(post)
    post = MySQLdb.escape_string(post)
    poster = MySQLdb.escape_string(poster)
    date = MySQLdb.escape_string(date)
    thread = MySQLdb.escape_string(thread)
    category = MySQLdb.escape_string(category)
    sql = "INSERT INTO posts (id,content,poster,date,category,thread,md5) VALUES ('%s','%s','%s','%s','%s','%s','%s')" % (ID, post, poster, date, category, thread, md5hash.hexdigest())
    try:
	dbc.execute(sql)
    except:
	pass
    
def postextract(soup):
    global x
    x = x+1
    soup = BeautifulSoup(soup)
    
    foobar = soup.body.fetchNext(name='a',attrs={'class' : 'mainmenu'},limit=3)
    try:
	category = StripTags(str(foobar[1]))
    except:
	return
    thread = StripTags(str(foobar[2]))
    
    IDs = soup.body.fetchNext('a', attrs={'href' : re.compile("artemisfowl-post*")})
    y = 0
    while(y < len(IDs)):
	pointLoc = str(IDs[y])
	#print pointLoc
	IDs[y] = pointLoc[26:pointLoc.find('.')]
	y = y+1
	
    posters = soup.body.fetchNext(attrs={'class' : "name"})
    while(y < len(posters)):
	posters[y] = StripTags(str(posters[y]))	
	y = y+1
	
    postdetails = soup.body.fetchNext(attrs={'class' : 'postdetails'})
    dates = []
    y = 0
    while(y < len(postdetails)):
	dates.append(StripTags(str(postdetails[y])))
	dates[len(dates)-1] = dates[len(dates)-1][8:33]
	y = y+3
    # All above this line is working code.

    
    #quotet = soup.body.fetchNext(attrs={'cellspacing' : '1', 'cellpadding' : '3', 'border' : '0', 'align' : 'center'})
    
    y = 0

    posts = soup.body.fetchNext(attrs={'width' : '100%', 'border' : '0', 'cellspacing' : '0', 'cellpadding' : '0'})
    y = 2
    q = 0
    post = []
    while(y < len(posts)):
	post.append(str(posts[y]))
	y = y+2
	
    
    y = 0
#    while (y < len(post)):
#	if str(post[y]) == '<span class="postbody"></span>':
#	    del post[y]
#	y = y+1
    ## POSTS HANDLED ##
	
    y = 0
#    if len(post) > len(IDs):
#	while (y<len(post)):
#	    print post[y]
#	    y = y+1
    #print b_posts
    y = 0
#    print "IDs:",len(IDs)
#    if len(post) > len(IDs):
#	print "pre-post:",len(post)
	#sets.Set(post)
#	post = uniquer(post)
#	print "post-post:",len(post)
#    else:
#	print "post:",len(post)
    while(y < len(post)):	
#	print y
	CreatePost(str(IDs[y]),
	post[y],
	str(posters[y]),
	str(dates[y]),
	str(category),
	str(thread))
	#	else:
	    #	    del post[y]
	    #	    print len(post)
	y = y+1
    
    y = 0


while(x < len(sys.argv)):
    print x,"/",len(sys.argv)-1
    print(sys.argv[x])
    f = file(sys.argv[x], 'r')
    postextract(f)
    f.close()