#!/usr/bin/python
# -*- coding: UTF-8 -*-

"""Inspired by
http://www.ducdigital.com/2009/11/24/massive-download-from-pixiv/
   modified by Nandaka. http://nandaka.wordpress.com
   further modified by Kaens Bard http://kaen.su
Works well in Python 2.6.x and 2.7.x

Get and install Setup Tools:
  for Python 2.6.x: http://pypi.python.org/packages/2.6/s/setuptools/setuptools-0.6c11.win32-py2.6.exe
Run cmd.exe, CHDIR to your python folder /Lib/site-packages and run
	easy_install.py beautifulsoup4
and
	easy_install.py http://wwwsearch.sourceforge.net/mechanize/src/mechanize-0.2.3.tar.gz

Usage:
   set your account language to Japanese (for now their English translation seems shaky, will fix later and don't care about Chinese)
   EITHER:
     create a text file in UTF-8 w/o signature in scripts folder, list all the links you want mass-downloaded/updated there, each on its own line
   OR:
     just write the links directly in command line
   setup config.ini for startpage and quickcheck OR set quickcheck in command line (+q on, -q off)
   run the script with the file name (or many) as parameter(s)
   you can add links as parameters, they'll be processed

The links accepted in the list file have been tested as working for:
  - user gallery pages
  - your own bookmark pages (make sure you've set up config.ini for your own profile for that)
  - tag searches
  - ranking pages
  - user stacc→user gallery (also accepts just the user ID as list name, checks for it are done after checks for list file name)
  - user profile → user gallery

Additionally, it is possible to enumerate a user's or your own okiniiri list from the profile into a local file in list format.
If you intend to use some Windows Notepad to create it, don't use unicode characters in links
"""

import re
import os
import sys
import codecs
import mechanize
import urllib2
import time
import random
import calendar
from datetime import date,datetime
from mechanize import Browser,ProxyHandler
from bs4 import BeautifulSoup

import ConfigParser

version = '2014-12-03'
#-------Defaults
url = 'http://www.pixiv.net/'
proxyAddress = ''
proxies = {}
username = ''
password = ''
useragent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:16.0) Gecko/16.0 Firefox/16.0'
debugHttp = False
numberOfPage = 0
startPage = 1
quickCheck = True
useRobots = False
genericfilename_format = r"_%pageline%/%artist% (%artist_id%)/%image_id% - %title%"
member_illust_format = r"(%artist_id%)/%image_id% - %title%"
bookmark_new_illust_format = r"[お気に入りtracker]/%image_id% (%artist_id%) - %title%"
response_format = r"%inurl_id% res/%image_id% (%artist_id%) - %title%"
ranking_format = r"[%today%'s %inurl_id%'s best]/[#%num%] %image_id% (%artist_id%) - %title%"
description_format = u"Title:%title%\\n%Tags:%tags%\\nCommentary:%remarks%"
tokentag = r""
overwrite = False
logFile = ""
descript_ion = False
description_file = u"descript.ion"
blacklist = r"[腐 【腐 腐】 腐] 腐注 腐向 ※腐 BL注" #yaoi cut
min_id = 0 #downto which id to longdownload, useful if you missed the 2000 latest okiniiri limit

br = Browser()
pr = None

fQuickCheck = False
curPage = 0
ugoira = False
noOfImages = 0
debugid = 0

#-----------Start Download, input: image ID, type: VOID, save to disk
def downloadImage(id):
	global br
	global fQuickCheck
	global curPage
	global ugoira
	global tokentag
	if quickCheck and fQuickCheck:
		return
	print "\nGetting file id: " + str(id) + "\t\t",
	tickity=0 #DEBUG
	while True:
	 try:
		mediumPage = br.open(url+"member_illust.php?mode=medium&illust_id="+str(id),timeout=10)
		if logFile == "": print chr(8)+'`',
		else: print '`',
		ttt=mediumPage
		mediumPage = mediumPage.read()
		#time.sleep(random.random()*10+0.4) #i'm a human. human, not a robot. okay? right.
		if logFile == "": print chr(8)+'-',
		else: print '-',
		parseTitle = BeautifulSoup(mediumPage)
		if logFile == "": print chr(8)+'.',
		else: print '.',
		tickity += 1
		if mediumPage.find(r"例外エラーが発生しました")>-1:
			print "...pixiv: unexpected error occurred, skipping..."
			raise
		elif mediumPage.find(r"該当作品は削除されたか、存在しない作品IDです。")>-1:
			print "...pixiv: submission taken down by artist"
			return
		elif mediumPage.find(r"マイピクにのみ公開されています")>-1:
			print "...pixiv: submission MyPixiv-only"
			return
		#DEBUG
		if tickity==20:
	 		print "-wrote tickity "+str(id)+" file-",
			lolfile=open('tickity '+str(id),'wb+')
			lolfile.write(parseTitle)
			lolfile.close()
		#/DEBUG
		break
	 except:
	 	if id == debugid:
	 		print 1,
	 	else:
	 		print '.',
	 	tickity += 1
	 	if tickity == 20:
	 		print "-wrote tickity "+str(id)+" file-",
			lolfile=open('tickity '+str(id),'wb+')
			lolfile.write(mediumPage)
			lolfile.close()
		time.sleep(5)
	
	anilookup = parseTitle.find('div',{'class':'_ugoku-illust-player-container'})
	ugoira = anilookup != None

	if id == debugid:
		f=open("logofpages","wb+")
		f.write(parseTitle)
		f.close()

	#parse artist
	try:
		artist = parseTitle.find('div',{'class':'_unit profile-unit'}).h1.contents[0]
		print "artist: ",artist
	except UnicodeError:
		print "(not supported by console)"
	except LookupError,AttributeError:
		if parseTitle.find("エラーが発生しました".decode('utf8')):
			print "...denied by pixiv server, skipping"
		else:
			print "...Oops. Submission was taken down by the artist while downloading the rest OR another error occurred."
			f=open("submission %s down" % (id),"a+")
			f.write(mediumPage)
			f.close()
			return
	except:
		print "...artist parsing failed, SKIPPING"
		time.sleep(5)

	#parse commentary
	#print "Commentary:",  #better not >_>
	try: artist_id = parseTitle.find('div',{'class':'_unit _work-detail-unit'}) .find('a',{'class':"tab-feed"})['href'] .split('/')[-1]
	except: print "artist_id not found:",parseTitle.find('div',{'class':'_unit _work-detail-unit'})
	try: works_caption = parseTitle.find('p',{'class':'works_caption'}).getText(separator=u'\\n')
	except: works_caption=u'n/a'

	#parse tags, for blacklist as well
	tagsline=u'' #for descript.ion
	tagslist=[]  #for log
	addtokentag=False #for the token tag
	try:
		for x in parseTitle.find('span',{'class':'tags-container'}).find_all(re.compile('a')):
			if not x.string: continue
			if x.string==u'*':
				tagsline+=x.string
			else:
				tagslist+=[x.string]
				tagsline+=x.string+u' '
				if not addtokentag:
					addtokentag = (tokentag==x.string)
	except:
		print "...tag parsing failed, retrying"
	if tagsline==u'': tagsline=u'(n/a)'
	print "Tags:",
	for x in tagslist:
		try:
			print x,
		except UnicodeError:
			print "-",
		except LookupError:
			print "-",
		for i in blacklist.decode("utf8").split(" "):
			if (len(i) > 0) and (x.find(i) >= 0):
				try:
				 print "blacklisted by %s, skipping..."%(i)
				except:
				 print "blacklisted, skipping..."
				return
	print ""

	#parse imagedate and number of manga pages
	manga = False
	classmeta = parseTitle.find('ul',{'class':'meta'})
	try:
		if classmeta != None:
			imagedate = classmeta.find(text=re.compile(r"(\d{4}年\d*月\d*日 \d{2}:\d{2}|\d*/\d*/\d{4} \d{2}:\d{2})".decode('utf8')))
		else:
			print "WARNING: Either pixiv changed page format or page not found"
		z = re.search(r"(\d{4})年(\d*)月(\d*)日 (\d{2}):(\d{2})".decode('utf8'),imagedate)
		if z: # Japanese date
			imagedate = datetime(int(z.group(1)),int(z.group(2)),int(z.group(3)),
			int(z.group(4)),int(z.group(5)))
		else:
			imagedate = work_info.find('ul',{'class':'meta'}).find_all('li',text=re.compile(r"\d{2}/\d{2}/\d{2} \d{2}:\d{2}".decode('utf8')))[0].split("\n")[0]
			z = re.search(r"(\d*)/(\d*)/(\d{4}) (\d{2}):(\d{2})".decode('utf8'),imagedate)
			if z: # American date
				imagedate = datetime(int(z.group(3)),int(z.group(1)),int(z.group(2)),
				int(z.group(4)),int(z.group(5)))
	except Exception,e:
		print "(a)",str(e),
		print "(timestamp not found, assuming now/UTC)",
		imagedate = datetime.utcnow()
	if classmeta != None:
		mlookup = classmeta.find(text=re.compile(r"複数枚投稿 \d*P".decode('utf8')))
	else:
		manga = False

	if ugoira:
		manga = False
	else:
	 if mlookup != None:
		manga = True
		mangapages = int(mlookup.split(" ")[1][:-1])
	 else:
		manga = False

	imagedates = imagedate.strftime('%Y-%m-%d %H:%M')
	print "Date:",imagedates,
	
	#parse title
	try:
#		for a in parseTitle.find_all('h1',{'class':'title'}): print a.text
		title = parseTitle.find_all('h1',{'class':'title'})[2].text
		print "title:",title
	except UnicodeError:
		print "(not supported by console)"
	except LookupError:
		print "(unknown console encoding)"
	except:
		title = "untitled"

	#parse actual image(s)
	if ugoira:
		z = re.search(r"pixiv\.context\.ugokuIllustFullscreenData.+(http:.+ugoira1920x1080\.zip)",mediumPage)
		if z:
			anilink = z.group(1).replace('\\',"")
		else:
			print "Failed to find the Cinematic zip, skipping"
			exit

	works_display = parseTitle.find('div',{"class":"works_display"})
 	if not manga:
 	 tickity = 0
	 if not ugoira:
 	  while 1:
	   try:
		viewPage = br.follow_link(url=works_display('a')[0]['href'])
		break
	   except Exception,e:
		if id == debugid:
		  	print "(b)",str(e)
	 	else:
	 		print '.',
	 	tickity += 1
	 	if tickity == 10:
	 		print "-wrote tickity "+str(id)+" file-",
			lolfile=open('tickity '+str(id),'wb+')
			lolfile.write(mediumPage)
			lolfile.close()
			if parseTitle.find("再度ログインしなおしてください".encode("utf8")):
		 		print "Attempting to re-login...",
		 		if login(username, password) == 0:
		 			print "success!"
		time.sleep(5)
	if manga:
		print "Getting manga,",mangapages,"pages..."
		imgList = []
		for i in range(mangapages):
			imgList.append("member_illust.php?mode=manga_big&illust_id="+str(id)+"&page="+str(i))
	elif ugoira:
		imgList = [anilink]
	else: #ergo, manga
		parser = BeautifulSoup(viewPage.read())
		imgList = parser('img')

	for imgFile in imgList: #each imgFile becomes a big page link-to-follow in case of manga ._.
		if quickCheck and fQuickCheck:
			break
		if ugoira:
			ext = re.sub(r'http:.*/(\d+_ugoira.*\.zip)',r'\1',anilink)
		elif manga:
			while 1:
			 try:
				viewPage = br.open(url+imgFile)
				parser = BeautifulSoup(viewPage.read())
				imgFileM = parser('img')[0]
				ext = os.path.basename(imgFileM['src'])
				break
			 except Exception,e:
				print "(c)",str(e),
				if str(e).find('global name')>-1:
					print "...skipping..."
					break
				if str(e).startswith('HTTP Error 404'):
		 			print "[404]The submission is rendered unloadable, skipping..."
		 			break
		 		if str(e).startswith('HTTP Error 400'):
		 			print "[400]The submission is rendered unloadable, skipping..."
		 			break
				time.sleep(5)
		else: #not manga or cine
			ext = os.path.basename(imgFile['src'])
		if ext.split('.')[0].startswith(str(id)):
			image_id = ext.split('.')[0]
			if manga:
				#for comfort browsing of manga stuff, zero-pad it to 2 digits:
				z = re.search(r'_p(\d*)',image_id)
				if z:
					image_id = re.sub(r'_p\d*','_p{0:02}'.format(int(z.group(1))),image_id)
			elif ugoira:
				image_id = re.sub(r'http://.*/(\d*)_ugoira.*',r'\1_ani',anilink)
			global _pager
			fileName = makeFilename(_pager, id, artist_id, artist, title, image_id, noOfImages, imagedates, addtokentag)
			fileName = fileName+"."+ext.split('.')[1].split('?')[0]
			print 'Saving to:',
			fileName = sanitizeFilename(fileName)
			try:
				print fileName
			except UnicodeError:
				print "(not supported by console)"
			except LookupError:
				print "(unknown console encoding)"
			if manga:
				dl(imgFileM['src'], fileName, viewPage.geturl(), imagedate)
			elif ugoira:
				dl(anilink, fileName, ttt.geturl(), imagedate)
			else:
				dl(imgFile['src'], fileName, viewPage.geturl(), imagedate)

			#descript.ion update tiem
			# assuming UTF-8 (so what if it doesn't work on most 2panel commanders, so what if it's not in the specs at http://jpsoft.com/ascii/descfile.txt? Who REALLY cares about this legacy? It just won't work otherwise, so there)
			# Using ^D the way Ghisler's Total Commadner 7.55a does--for description breaks, instead of what the specs say.
			# Likewise, EoF characters aren't specifically processed
			# Assuming available for writing
			# I hate everything about this implementation.
			if descript_ion:
				dfile = os.path.dirname(fileName)+'/'+description_file
				_descmagic='\x04\xC2\x0A' #writelines() will autoreplace \x0A with \x0D\x0A on w32 O_o
				if os.path.exists(dfile):
					descfile = open(dfile,'r')
					curdesc=descfile.readlines()
					descfile.close()
				else:
			 		curdesc=[]
				notyet=True
				for x in curdesc:
			 		if x.find(os.path.basename(fileName).encode('utf-8'))>-1:
						notyet=False
			 			break
				if notyet:
		 			print 'Updating descript.ion...'
			 		curdesc.append( makeDescription( os.path.basename(fileName), title, tagsline, works_caption ).encode('utf-8') + _descmagic )
					descfile = open(dfile,'w')
					descfile.writelines(curdesc)
					descfile.close()
					curdesc=[]
		ttt.close()

#-----------List all images
#@profile
def downloadAllImages(pager):
	global fQuickCheck
	global _pager
	_pager = pager
	print "Getting pages from ", pager
	fQuickCheck = False
	global startPage
	global curPage
	curPage = startPage
	hasMorePage = 1
	global noOfImages
	noOfImages = 1
	id = None
	previd = [0]

	weirdvar = 5
	relogined = 0
	while (hasMorePage != 0) and (weirdvar > 0):
		if quickCheck and fQuickCheck:
			break
		print "\nListing page #%d\t" % (curPage),
		while True:
		 try:
			listPage = br.open(pager+"&p="+str(curPage),timeout=10)
			if logFile == "": print chr(8)+'`',
			else: print '`',
			ttt=listPage
			listPage=listPage.read()
			ttt.close()
			if logFile == "": print chr(8)+'-',
			else: print '-',
			#time.sleep(random.random()*10+2) #netiquette wwwhy is it here at all
			parseList = BeautifulSoup(listPage)
			if logFile == "": print chr(8)+'.',
			else: print '.',
			break
		 except Exception, e:
		 	print "(d)",str(e)
		 	if str(e).find("403"):
		 		print "Attempting to re-login...",
		 		if login(username, password) == 0:
		 			print "success!"
		 	else:
		 		print '.',
				time.sleep(5)
		itsranking = False
		itsbookmarks = False
		illust_c = parseList.find('li',{'class':r'image-item'})
		if illust_c == None: #bookmarks?
			illust_c = parseList.find('div', {'class':re.compile(r'^display_works')})
			if illust_c == None:
				#if illust_c == None:
				#	illust_c = parseList.find('ul', {'class':re.compile(r'image-items^')})
				illust_c = parseList.find('div', {'class':re.compile('^ranking-items')})
				if illust_c == None: #ranking?
					print "Unknown webpage design, ask the dev to support it"
					return
				else:
					itsranking = True
					print "Parsed as ranking"
			else:
				itsbookmarks = True
				print "Parsed as bookmarks"
		else:
			print "Parsed as generic"

		if illust_c != None: #found
			#if itsranking: illust = illust_c.find_all('a',href=re.compile(r'illust_id=\d*'))
			##else:
			illust = parseList.find_all('a',{'class':re.compile('^work')})
			for link in illust:
				if quickCheck and fQuickCheck:
					break
				try:
					id = link['href'].split('=')[2].split('&')[0].split('_')[0].split('?')[0]
				except IndexError:
					if link['href'].find('response.php'):
						continue
					print link['href'], 'has failed: unable to pick illust_id'
				if (id in previd) or (int(id) < 11):
					continue #skipping what's just in and incorrect id
				if int(id) < min_id:
					print "Lower id than minimum, stopping"
					break
				print "#"+ str(noOfImages)+':',
				downloadImage(id)
				previd.append(id)
				noOfImages = noOfImages + 1
			hasMorePage = len(illust)
			curPage += 1
			if numberOfPage == curPage:
				hasMorePage = 0
			elif pager.find("bookmark_new_illust.php") and (curPage > 100):
				hasMorePage = 0
			elif id != None:
			 if int(id) < min_id:
				hasMorePage = 0
		else:
			if relogined==0:
				print "\nNothing found on the page (div class * parsing error?), retrying.\n"
				weirdvar -= 1
				time.sleep(5)
				if weirdvar==0:
					print "This is taking too long, attempting re-login..."
					configBrowser()
		 			if login(username, password) == 0:
			 			print "success!"
					relogined = 1
		 			weirdvar = 5
		 	else:
		 		print "That didn't help, skipping.\n"
		 		weirdvar = 0

	print "Listing complete"
	previd = [0]
	re.purge()


#-----------Download file
def dl(url,filename,referer=None):
	dl(url,filename,referer,datetime.now())

def dl(url,filename,referer,imagedate):
	#circumventing some weirdass bug where we're downloading the same thing twice
	global prevname
	global fQuickCheck
	if quickCheck and fQuickCheck:
		return
	#url = re.sub(r'_p(\d*)\.',r'_big_p\1.',url)  #add big_ to manga #OBSOLETED
	try:
		print "Downloading:", url,
	except LookupError:
		print "(unknown console encoding)"
	if os.path.exists(filename) and os.path.isfile(filename):
		if quickCheck:
			fQuickCheck = True
			print "\tFile exists, quick check-skipping the rest.\n"
		else:
			print "\tFile exists!\n"
		return
	
	print " "
	print "Trying to request ",

	req = urllib2.Request(url)
	if referer != None: req.add_header("Referer", referer)

	while True:
	  try:
		res = br.open(req,timeout=10)
		break
	  except Exception, e:
		if str(e).startswith('HTTP Error 404'):
			url = re.sub(r'_big_p(\d*)\.',r'_p\1.',url)  #remove big_ from manga back >_>
			req = urllib2.Request(url)
			if referer != None:
				req.add_header("Referer", referer)
			print "\nURL change to",url
			while True:
			 try:
				res = br.open(req,timeout=10)
				break
	 		 except Exception, e:
				if str(e).startswith('HTTP Error 404'):
		 			print "Error 404 on fullsize, skipping the picture..."
		 			return
		 		else:
		 			print "(e)",str(e),
					print '.',
					time.sleep(5)
		else:
		 	print "(f)",str(e),
			print '.',
			time.sleep(5)

	dir = os.path.dirname(filename)
	if not os.path.exists(dir):
		try:
			print " Creating directory", dir,
		except UnicodeError:
			print "(not supported by console)",
		except LookupError:
			print "(unknown console encoding)",
		os.makedirs(dir)

	fretry = False
	fretrying = True
	save = open(filename, "w+b", 32768)
	while fretrying:
	 try:
		prev = 0
		if logFile == "":
			print '{0:10d} bytes'.format(prev),
		while 1:
			save.write(res.read(1024*256))
			curr = save.tell()
			if logFile == "":
				print '\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b',
				print '{0:10d} bytes'.format(curr),
				if curr == prev:
					fretrying = False
					break
			prev = curr
	 except IOError:
		if not fretry:
			fretry = True
			#save.seek(0,0)
			#save.truncate()
			save.flush()
			res = br.open(req,timeout=10)
			#print "and on...",
		else:
			print ".",
		time.sleep(5)
	save.close()
	if logFile != "":
		print '{0:10d} bytes'.format(curr),
	os.utime( filename, (calendar.timegm(imagedate.timetuple()), calendar.timegm(imagedate.timetuple())) )
	print " done"


#-------------produce an artist's okiniiri list.txt
def dlOkiniiriList(pager):
	curpage=1
	gotlist=''
	artistid=''
	print "Retrieving okiniiri list at:", pager,
	while True:
		req = urllib2.Request(pager+'&p=%d'%(curpage))
		while True:
		 try:
			res = br.open(req,timeout=10)
			break
	 	 except Exception, e:
			if str(e).startswith('HTTP Error 404'):
		 		print "Not found, skipping."
		 		return
			else:
				print '.',
				time.sleep(5)
		while True:
		 try:
			parseList = BeautifulSoup(res.read())
			break
		 except:
			time.sleep(5)
			print ',',
		
		if artistid=='':
			artistid = re.search( r'^http://i\d*\.pixiv\.net/img\d*/profile/([^/]*)/.*', parseList.find('div',{'class':'profile_area'}).find('img')['src'] ).group(1)
			print '\nUser='+artistid, 'Pages:',
		list_person = parseList.find_all('div',{'class':'usericon'})
		for au in list_person:
			z = re.search( r'^http://i\d*\.pixiv\.net/img\d*/profile/([^/]*)/(mobile/)?.*\..*', au.find('img')['src'] )
			if z: tehid = z.group(1)
			else:
				print "~", #extracting stacc name from artist's page
				q=urllib2.Request(url+au.find('a')['href'])
				try:
					a=br.open(q,timeout=10)
					p=BeautifulSoup(a.read())
					p=p.find('div',{'class':'extaraNavi'}).find('a',href=re.compile(r'.*net/stacc/.*'))['href']
					tehid=p.split('/')[-1]
					#print tehid,
				except:
					print "?", #failed to extract. bah, forget all this omfgletsfinditnomatterwhat shit, use full link
					tehid=url+'member_illust.php?id='+au.find('a')['href'].split('=')[1]
			
			gotlist+= tehid+'\x0A'
		
		if list_person:
			print curpage,
			curpage+=1
		else:
			filename = 'Okiniiri of %s.txt'%(artistid)
			print "\nSaving the list as",filename
			open(filename,'w+').writelines(gotlist.encode('utf8'))
			return

#-------------process list.txt
def processList(filename):
	def ___commonpart___(pager):
		if re.search('^[a-zA-Z0-9-_]*$',pager):
			pager = "http://pixiv.me/"+pager
		if not re.search(r'^(http://)*([w\.]*)pixiv\.(net|cc)',pager):
			print "Not a pixiv address! Skipping..."
		else:
			pager = re.sub(r'member\.php',r'member_illust.php',pager)
			pager = re.sub(r'([?&]page=\d*|[?&]p=\d*|[?&]num=\d*)','',pager)
			if re.search(r'/stacc/',pager):
				print "Converting a stacc address...",
				try:
					ww = br.open(pager)
					www = BeautifulSoup(ww.read())
					wwww = www.find('a',{'title':u'作品'})
					pager = wwww['href']
					print "success!"
				except Exception,e:
					print "(g)",str(e),": skipping..."
					return
			if pager == "http://www.pixiv.net/bookmark_new_illust.php":
				pager = "http://www.pixiv.net/bookmark_new_illust.php?mode=new"
			if re.search(r'bookmark\.php.*type=user',pager):
				dlOkiniiriList(pager)
			else:
				downloadAllImages(pager)
	
	if filename[:7]=='http://':
		print "Downloading from:", filename
	else: print "Processing list from:", filename
	if filename[:7]=='http://':
		___commonpart___(filename)
	elif os.path.exists(filename) and os.path.isfile(filename):
		reader = open(filename,'r')
		for line in reader:
			pager = line.replace(chr(10),"").replace(chr(13),"")
			if pager.startswith('#'):
				continue
			___commonpart___(pager)
	else:
		print "File not found."
		
#-------------Sanitize filename (windows, but / counts as \)
badchars= re.compile(r'['+chr(01)+'-'+chr(31)+']|^\.|\.$|^ | $|^$|\?|:|<|>|\||\*|\"')
badnames= re.compile(r'(aux|com[1-9]|con|lpt[1-9]|prn)(\.|$)')

def sanitizeFilename(s):
	name= badchars.sub('_', s)
	if badnames.match(name):
		name= '_'+name
	return name


#------------Main Block
def main():
	try:
		prepare()
		global logFile
		if logFile != "":
			print "Logging output to "+logFile
			sys.stdout = codecs.open(logFile,'a+',encoding="utf-8-sig")
		global version
		print "Pixiv Mass Downloader ver." + version
		if logFile != "":
			print "By Duc Digital; Nandaka; Kaens Bard"

		global username
		if username == None or username == "":
			if logFile != "":
				print "Cannot log in from stdin while file-logging, terminating"
			else:
				username = raw_input("Username = ")
		else:
			print "Login as: "+username
		global password
		if password == None or password == "":
			if logFile != "":
				print "Cannot log in from stdin while file-logging, terminating"
			else:
				password = raw_input("Password = ")

		global numberOfPage
		if numberOfPage != 0:
			print "Page processing limit = ", numberOfPage

		global overwrite
		global quickCheck
		if overwrite:
			if not quickCheck:
				print "Overwrite mode"
			else:
				print "Overwrite mode--overridden by Quick check mode"
				overwrite = False

		if len(sys.argv) == 0:
			print "\nUTC", str(datetime.utcnow()), "Empty command line! Nothing to process."
		else:
			if login(username, password) == 0:
				for arg in sys.argv[1:]:
					if arg=='-q':
						print "\nQuick check mode now disabled"
						quickCheck = False
					elif arg=='+q':
						print "\nQuick check mode now enabled, overriding overwrite mode"
						quickCheck = True
						overwrite = False
					elif arg[:7]=='http://':
						processList(arg)
					elif os.path.exists(arg) and os.path.isfile(arg):
						processList(arg)
					else:
					 if re.search('^[a-zA-Z0-9-_]*$',arg):
						print "Checking for a stacc address...",
						try:
						 #ww = br.open(url+'stacc/'+arg)
						 ww = br.open("http://pixiv.me/"+arg)
						 www = BeautifulSoup(ww.read())
						 #open(arg+'-dump','w+').writelines(www.encode('utf8'))
						 wwww = www.find("a",{'class':'tab-works'})
						 print 'success!'
						 processList(url+wwww['href'])
						except Exception,e:
						 if str(e).startswith('HTTP Error 404'):
						 	print 'failed, skipping...'
						 else:
						 	print "(h)",str(e)
				if not quickCheck and (logFile==""):
					print "UTC", str(datetime.utcnow()), "All done! Press Enter to exit."
					raw_input()
				else:
					print "UTC", str(datetime.utcnow()), "Quick check complete."
			else:
				print "UTC", str(datetime.utcnow()), "Failed to log in."

	except KeyboardInterrupt as ex:
		print 'CTRL+C, aborted'


#-------load config
def loadConfig():
	config = ConfigParser.RawConfigParser()
	try:
		config.read('pixivUtil.ini')
		print "Reading values for",
		global username
		print "username",
		username = config.get('Authentication','username')
		global password
		print "password",
		password = config.get('Authentication','password')
		global proxyAddress
		global proxies
		print "proxy_address",
		proxyAddress = config.get('Settings','proxy_address')
		if proxyAddress:
		 try:
			proxies = {'http':proxyAddress}
		 except:
			print "(couldn't parse proxy line)",
			proxies = {}
		global useragent
		print "user_agent",
		useragent = config.get('Settings','user_agent')

		global numberOfPage
		print "number_of_page",
		numberOfPage = config.getint('Pixiv','number_of_page')
		global startPage
		print "start_page",
		startPage = config.getint('Pixiv','start_page')
		global quickCheck
		print "quickcheck",
		quickCheck = config.getboolean('Pixiv','quickcheck')
		global genericfilename_format
		print "genericfilename_format",
		genericfilename_format = config.get('Pixiv','genericfilename_format').decode('utf8')
		global member_illust_format
		print "member_illust_format",
		member_illust_format = config.get('Pixiv','member_illust_format').decode('utf8')
		global bookmark_new_illust_format
		print "bookmark_new_illust_format",
		bookmark_new_illust_format = config.get('Pixiv','bookmark_new_illust_format').decode('utf8')
		global response_format
		print "response_format",
		response_format = config.get('Pixiv','response_format').decode('utf8')
		global ranking_format
		print "ranking_format",
		ranking_format = config.get('Pixiv','ranking_format').decode('utf8')
		global description_format
		print "description_format",
		description_format = config.get('Pixiv','description_format').decode('utf8')
		global tokentag
		print "tokentag",
		tokentag = config.get('Pixiv','tokentag').decode('utf8')
		global blacklist
		print "blacklist",
		blacklist = config.get('Pixiv','blacklist')

		global logFile
		print "logfile",
		logFile = config.get('Settings','logfile').decode('utf8')
		global descript_ion
		print "descript.ion",
		descript_ion = config.getboolean('Settings','descript.ion')
		global description_file
		print "descript.ion_file",
		descript_ion = config.get('Settings','descript.ion_file').decode('utf8')
		global debugHttp
		print "debug_http",
		debugHttp = config.getboolean('Settings','debug_http')
		global useRobots
		print "use_robots."
		useRobots = config.getboolean('Settings','use_robots')

	except ConfigParser.NoOptionError:
		print "Required option not found in config, writing defaults..."
		writeConfig()
		exit()
	except ConfigParser.NoSectionError:
		print "Required section not found in config, writing defaults..."
		writeConfig()
		exit()

#-------write config
def writeConfig():
	print "Writing defaults is temporarily disabled, please add the missing option/section manually."
	return
	config = ConfigParser.RawConfigParser()
	config.add_section('Settings')
	config.add_section('Pixiv')
	config.add_section('Authentication')

	config.set('Authentication', 'username', username)
	config.set('Authentication', 'password', password)

	config.set('Pixiv', 'number_of_page', numberOfPage)
	config.set('Pixiv', 'start_page', startPage)
	config.set('Pixiv', 'quickcheck', quickCheck)
	config.set('Pixiv', 'blacklist', blacklist)
	config.set('Pixiv', 'genericfilename_format', genericfilename_format)
	config.set('Pixiv', 'member_illust_format', member_illust_format)
	config.set('Pixiv', 'bookmark_new_illust_format', bookmark_new_illust_format)
	config.set('Pixiv', 'response_format', response_format)
	config.set('Pixiv', 'ranking_format', ranking_format)
	config.set('Pixiv', 'descript.ion_format',description_format)
	config.set('Pixiv', 'tokentag',tokentag)

	config.set('Settings', 'proxy_address', proxyAddress)
	config.set('Settings', 'user_agent', useragent)
	config.set('Settings', 'debug_http', debugHttp)
	config.set('Settings', 'use_robots', useRobots)
	config.set('Settings', 'logfile', logFile)
	config.set('Settings','descript.ion',descript_ion)
	config.set('Settings','descript.ion_file',description_file)
	
	with open('pixivUtil.ini', 'wb') as configfile: #utf-8 dammit
		config.write(configfile)
	print "Configuration file saved."

#-------construct the filename
def makeFilename(pageline, member_id, artist_id, artist, title, image_id, num, imgdate, addtokentag):
	global tokentag
	image_id = str(image_id)
	if (tokentag != "") and addtokentag:
		image_id = image_id+r"["+tokentag+r"]"
	inurl_id = ' '
	q = urllib2.unquote(pageline).decode('utf8').split('/')[-1]
	#specific format checks
	z = re.search(r'member_illust\.php\?id=(\d*)',q)
	if z:
		inurl_id = z.group(1)
		nameformat = member_illust_format
	else:
		z = re.search(r'bookmark_new_illust\.php',q)
		if z:
			nameformat = bookmark_new_illust_format
		else:
			z = re.search(r'response\.php\?illust_id=(\d*)',q)
			if z:
				inurl_id = z.group(1)
				nameformat = response_format
			else:
				z = re.search(r'ranking.*(mode=([a-z0-9]*)|rookie)',q)
				if z:
					inurl_id = z.group(1)
					nameformat = ranking_format
				else:
					nameformat = genericfilename_format
	nameformat = nameformat.replace('%pageline%',q.replace(u'?',u'？'))\
	.replace('%artist%',artist.replace(u'\\',u'_').replace(u'/',u'_'))\
	.replace('%title%',title.replace(u'\\',u'_').replace(u'/',u'_'))\
	.replace('%image_id%',image_id).replace('_big','')\
	.replace('%member_id%',str(member_id))\
	.replace('%artist_id%',artist_id)\
	.replace('%inurl_id%',inurl_id)\
	.replace('%today%',str(date.today()))\
	.replace('%date%',imgdate.replace(u'\\',u'-')).replace(u'//',u'-')\
	.replace('%num%','{0:03d}'.format(num)) #leaving at 3 because sorting on it is only important for ranking, and that's 500 max
	return nameformat


#-------Construct the line of descript.ion
def makeDescription(file, title, tags, remarks):
	_file = file
	if _file.find(' ')>-1:
		_file = '"'+_file+'"'
	_file+=' '
	return _file + description_format\
	.replace(u'%title%',title)\
	.replace(u'%tags%',tags)\
	.replace(u'%remarks%',remarks)


#-------Configure browser object
def configBrowser():
	global br
	global pr
	global proxies
	if proxies:
		pr = ProxyHandler(proxies)
	else:
		pr = ProxyHandler()
	br.set_handle_equiv(True)
	#br.set_handle_gzip(True)
	br.set_handle_redirect(True)
	br.set_handle_referer(True)
	global useRobots
	br.set_handle_robots(useRobots)
	global debugHttp
	br.set_debug_http(debugHttp)
	br.visit_response
	global useragent
	br.addheaders = [('User-agent', useragent)]


#-------Login to pixiv
def login(username, password):
	print "Login at %s\t\t" % (url),
	req = urllib2.Request(url+'login.php')
	while True:
	 try:
		response = br.open(req,timeout=10)
		break
	 except Exception,e:
	 	 print "(i)",str(e),':: sleeping for 5 sec'
	 	 time.sleep(5)
	rd = response.read()
	global debugHttp
	if debugHttp:
		print rd

	try:
		form = br.select_form(nr=1)
		br['pixiv_id'] = username
		br['pass'] = password
		response = br.submit()  # LOGIN
	except Exception,e:
		print "(j)",str(e),"(assuming relogin)"
		#print "(login form not found, assuming relogin)",
		lolfile=open('loginness.log','wb+')
		lolfile.write(rd)
		lolfile.close()

	if response.geturl() == 'http://www.pixiv.net/mypage.php':
		print "DONE!"
		return 0
	else :
		print 'Wrong username or password'
		lolfile=open('loginness.log','wb+')
		lolfile.write(response.read())
		lolfile.close()
		return 1

def prepare():
	loadConfig()
	configBrowser()

def printConfig():
	print "Username  :",username
	print "Password  :",password
	print "Proxy Addr:",proxyAddress

if __name__ == "__main__":
	main()