import re
import sys
import time
import math
import urllib2
import urlparse
import optparse
import hashlib
import socket
from string import *
from cgi import escape
from traceback import format_exc
from Queue import Queue, Empty as QueueEmpty
from BeautifulSoup import BeautifulSoup


AGENT ="%s %s" % ("deneme", "agent")


def getir_butunLinkler(url):
    page = Getir(url)
    page.getirgetir()
    for i, url in enumerate(page):
        print "%d. %s" % (i, url)



"""
getir_butunLinkler("http://www.eciftim.com")
0. http://www.eciftim.com/
1. http://www.eciftim.com/kategori/
2. http://www.eciftim.com/xkategory/sonilanlar/
3. http://www.eciftim.com/login/signup/
4. http://www.eciftim.com/login/
5. http://www.eciftim.com/xkategory/xkategory/es/gunluk-esya/
6. http://www.eciftim.com/xkategory/xkategory/es/ev-esya/
7. http://www.eciftim.com/xkategory/xkategory/es/gunluk-seyler/
8. http://www.eciftim.com/xkategory/xkategory/es/elktronik-esya/
9. http://www.eciftim.com/xkategory/xkategory/es/pazarlama/
10. http://www.eciftim.com/xkategory/xkategory/es/kelepir/
11. http://www.eciftim.com
12. http://www.eciftim.com/ilan/7/ajax/
13. http://www.eciftim.com/ilan/6/ajax/
14. http://www.eciftim.com/index.html
15. http://www.eciftim.com/hakkimizda.html
16. http://www.eciftim.com/yardim.html
17. http://www.eciftim.com/guvenlik.html
18. http://www.eciftim.com/nasilcalisir.html
19. http://www.eciftim.com/kullanimkosulu.html
20. http://www.eciftim.com/sirketlericin.html
21. http://www.eciftim.com/gizlilik.html
"""

class Link (object):

    def __init__(self, src, dst, link_type):
        self.src = src
        self.dst = dst
        self.link_type = link_type

    def __hash__(self):
        return hash((self.src, self.dst, self.link_type))

    def __eq__(self, other):
        return (self.src == other.src and
                self.dst == other.dst and
                self.link_type == other.link_type)
    
    def __str__(self):
        return self.src + " -> " + self.dst





        
          
class DataIstisna (Exception):
    def __init__(self, message, mimetype, url):
        Exception.__init__(self, message)
        self.mimetype=mimetype
        self.url=url
        

class Getir(object):
    



    def __init__(self, url):
        self.url = url
        self.out_urls = []
        self.encoding = ""
        self.content = ""
        socket.setdefaulttimeout(2)
        
    def __getitem__(self, x):
        return self.out_urls[x]

    def out_links(self):
        return self.out_urls

    def _addHeaders(self, request):
        request.add_header("User-Agent", AGENT)

    def _open(self):
        url = self.url
        try:
            request = urllib2.Request(url)
            handle = urllib2.build_opener()
        except IOError:
            return None
        return (request, handle)

    def getirgetir(self):
        request, handle = self._open()
        self._addHeaders(request)
        if handle:
            try:
                data=handle.open(request)
                mime_type=data.info().gettype()
                url=data.geturl();
                if mime_type != "text/html":
                    raise DataIstisna("bir sey yok %s" % mime_type,
                                              mime_type, url)
                self.content = unicode(data.read(), "utf-8",
                                       errors="replace")
                soup = BeautifulSoup(self.content)
                self.encoding = "utf-8"
                tags = soup('a')
            except urllib2.HTTPError, error:
                if error.code == 404:
                    print >> sys.stderr, "ERROR: %s -> %s" % (error, error.url)
                else:
                    print >> sys.stderr, "ERROR: %s" % error
                tags = []
            except urllib2.URLError, error:
                print >> sys.stderr, "ERROR: %s" % error
                tags = []
            except DataIstisna, error:
                print >>sys.stderr, "atlaniyor %s, e %s" % (error.url, error.mimetype)
                tags = []
            for tag in tags:
                href = tag.get("href")
                if href is not None:
                    url = urlparse.urljoin(self.url.encode(self.encoding), escape(href))
                    if url not in self:
                        self.out_urls.append(url.encode(self.encoding))