dato198613
Super Member

   
ჯგუფი: Registered
წერილები: 226
წევრი No.: 41033
რეგისტრ.: 2-September 07
|
#35056539 · 5 Feb 2013, 14:46 · · პროფილი · პირადი მიმოწერა · ჩატი
არავინ იცით? C:\Users\User\Desktop\extractionGoogleArtProject\extractionGoogleArtProject>ext ractiongoogleartproject.py telechargerOeuvre('http://www.googleartproject.com/co llection/musee-dorsay-paris/artwork/the-saint-lazare-station-claude-monet/508102 /', 'Monet_-_Gare_Saint-Lazare.jpg', 3) File "C:\Users\User\Desktop\extractionGoogleArtProject\extractionGoogleArtPro ject\extractionGoogleArtProject.py", line 32 SyntaxError: Non-ASCII character '\xe9' in file C:\Users\User\Desktop\extractio nGoogleArtProject\extractionGoogleArtProject\extractionGoogleArtProject.py on li ne 32, but no encoding declared; see http://www.python.org/peps/pep-0263.html fo r details ამ შეცდომას მიწერს * * * CODE | import PyV8
import re, urllib2, time, os, json, unicodedata, base64, subprocess from xml.dom import minidom
### configuration
cheminDossierFragments = "fragments" cheminDossierImages = "images" cheminDossierInfos = "infos" cheminDossierImageMagick = "ImageMagick" cheminjava[ertad]script = "core.js"
refererGoogleArtProject = "http://www.googleartproject.com/" dureeSleep = 0.5
separateur = "\t"
### contenu d'une page Web
def getContenuUrl(url, referer = ""): time.sleep(dureeSleep)
requete = urllib2.Request(url) if referer: requete.add_header("Referer", referer)
print "(%s)" % url return urllib2.urlopen(requete).read()
### URL d'un fragment d'image, dйcryptage d'un fragment d'image
context = PyV8.JSContext() context.enter() context.evаl(open(cheminjava[ertad]script, "r").read())
def getUrlFragment(urlImage, x, y, zoom, timestamp): return context.evаl("getUrlFragment('%s', '%i', '%i', '%i', '%i')" % (urlImage, x, y, zoom, timestamp))
def decrypterFragment(contenuFragment): arrayFragment = [ord(char) & 0xFF for char in contenuFragment]
base64Fragment = context.evаl("decrypterFragment(%s)" % str(arrayFragment))
return base64.b64decode(base64Fragment)
### infos sur un tableau (peintre, titre, date...)
def getInfosTableau(urlPageTableau): regexJsonTableau = re.compile("""var CURRENT_ARTWORK = new ap.Artwork\((.+)\);""")
infosTableau = {}
contenuPageTableau = getContenuUrl(urlPageTableau, refererGoogleArtProject) jsonTableau = json.loads(re.findall(regexJsonTableau, contenuPageTableau)[0])
infosTableau["urlImage"] = str(jsonTableau["aggregation_image_url"]) if infosTableau["urlImage"][:5] <> "http:": infosTableau["urlImage"] = "http:" + infosTableau["urlImage"]
infosTableau["peintre"] = jsonTableau["artist_display_name"] infosTableau["titre"] = jsonTableau["title"]
try: infosTableau["date"] = str(jsonTableau["pretty_display_date"]) except: infosTableau["date"] = ""
try: infosTableau["titre original"] = jsonTableau["facets"]["Original Title"][0] except: infosTableau["titre original"] = ""
try: infosTableau["autre titre"] = jsonTableau["facets"]["Non-English title"][0] except: infosTableau["autre titre"] = "" try: infosTableau["mouvements"] = jsonTableau["facets"]["Style"][0] except: infosTableau["mouvements"] = "" try: infosTableau["techniques"] = jsonTableau["facets"]["Medium"][0] except: infosTableau["techniques"] = "" return infosTableau
### tйlйchargements des fragments d'image
def getInfosFragments(urlImage, zoom): docXml = minidom.parse(urllib2.urlopen(urlImage + "=g")) largeurFragment = int(docXml.firstChild.attributes["tile_width"].value) hauteurFragment = int(docXml.firstChild.attributes["tile_height"].value) zoomMax = int(docXml.firstChild.attributes["full_pyramid_depth"].value) - 1
if zoom > zoomMax: zoom = zoomMax xMax = int(docXml.getElementsByTagName("pyramid_level")[zoom].attributes["num_tiles_x"].value) yMax = int(docXml.getElementsByTagName("pyramid_level")[zoom].attributes["num_tiles_y"].value) return zoom, xMax, yMax, largeurFragment, hauteurFragment
def telechargerFragment(urlImage, cheminFragment, x, y, zoom): timestamp = int(time.time()) urlFragment = getUrlFragment(urlImage, x, y, zoom, timestamp)
contenuFragment = getContenuUrl(urlFragment, refererGoogleArtProject)
contenuFragment = decrypterFragment(contenuFragment) fichierFragment = open(cheminFragment, "wb") fichierFragment.write(contenuFragment) fichierFragment.close()
def telechargerTousFragments(urlImage, xMax, yMax, zoom): i = 0 for y in range(yMax): for x in range(xMax): i = i+1 cheminFragment = os.path.join(cheminDossierFragments, "fragment_%s.jpg" % format(i, "03d")) telechargerFragment(urlImage, cheminFragment, x, y, zoom)
### reconstitution de l'image а partir des fragments
def reconstituerImage(nomFichierImage, xMax, yMax, largeurFragment, hauteurFragment): commandeAssembler = (os.path.join(cheminDossierImageMagick, "montage.###") + " " + os.path.join(cheminDossierFragments, "fragment_[0-9]*.jpg") + " -quality 100" + " -tile " + str(xMax) + "x" + str(yMax) + " -geometry " + str(largeurFragment) + "x" + str(hauteurFragment) + " " + os.path.join(cheminDossierImages, nomFichierImage)) commandeRogner = (os.path.join(cheminDossierImageMagick, "mogrify.###") + " -quality 100" + " -trim" + " -fuzz 10%" + " " + os.path.join(cheminDossierImages, nomFichierImage))
p1 = subprocess.Popen(commandeAssembler, shell=True, creationflags=0x08000000) p1.communicate() p2 = subprocess.Popen(commandeRogner, shell=True, creationflags=0x08000000) p2.communicate()
### liste des tableaux d'un peintre
def getUrlPagesTableaux(idPeintre): listeUrlPagesTableaux = [] urlJsonTableaux = "http://www.googleartproject.com/api/int/gap2/artwork/?canonical_artist=%i&limit=500&offset=0&format=json" % idPeintre jsonTableau = json.loads(getContenuUrl(urlJsonTableaux, refererGoogleArtProject))
for tableau in jsonTableau["objects"]: listeUrlPagesTableaux.append("http://www.googleartproject.com" + str(tableau["absolute_url"]))
return listeUrlPagesTableaux
### normalisation d'un chaоne de caractиres
def normaliserChaine(chaine): # encodage UTF-8 try: chaine = unicode(chaine) except: chaine = unicode(chaine, "iso-8859-1") # suppression des accents chaine = unicodedata.normalize("NFKD", chaine)
# encodage ASCII chaine = chaine.encode("ASCII", "ignore")
return chaine
def normaliserNomFichier(chaine): chaine = normaliserChaine(chaine) # caractиre non alpha-numйrique => "_" chaine = re.sub("[^0-9a-zA-Z\.\-]", "_", chaine)
return chaine
### tйlйchargement des images et infos
def nettoyerDossier(cheminDossier): for nomFichier in os.listdir(cheminDossier): cheminFichier = os.path.join(cheminDossier, nomFichier) if os.path.isfile(cheminFichier): os.remove(cheminFichier)
def telechargerTableau(urlImage, nomFichierImage, zoom): zoom, xMax, yMax, largeurFragment, hauteurFragment = getInfosFragments(urlImage, zoom) nettoyerDossier(cheminDossierFragments) # nettoyage du dossier des fragments telechargerTousFragments(urlImage, xMax, yMax, zoom) reconstituerImage(nomFichierImage, xMax, yMax, largeurFragment, hauteurFragment)
def telechargerTableauxPeintre(nomPeintre, idPeintre, zoom): listeChamps = ["image", "peintre", "titre", "date", "titre original", "autre titre", "mouvements", "techniques"] fichierInfos = open(os.path.join(cheminDossierInfos, normaliserNomFichier(nomPeintre) + ".csv"), "w") fichierInfos.write(separateur.join(listeChamps) + "\n")
listeUrlPagesTableaux = getUrlPagesTableaux(idPeintre)
print "### %s : %i tableaux" % (nomPeintre, len(listeUrlPagesTableaux))
i = 0 for urlPageTableau in listeUrlPagesTableaux: i = i+1 print "# %s, tableau %i/%i : %s" % (nomPeintre, i, len(listeUrlPagesTableaux), urlPageTableau) infosTableau = getInfosTableau(urlPageTableau) urlImage = infosTableau["urlImage"] nomFichierImage = normaliserNomFichier(nomPeintre) + "-" + format(i, "03d") + ".jpg"
telechargerTableau(urlImage, nomFichierImage, zoom)
infosTableau["image"] = nomFichierImage fichierInfos.write(separateur.join([normaliserChaine(infosTableau[champ]) for champ in listeChamps]) + "\n")
fichierInfos.close()
### fonctions principales
def telechargerOeuvre(urlOeuvre, nomFichierImage, zoom): contenuPageOeuvre = getContenuUrl(urlOeuvre, refererGoogleArtProject) regexUrlImage = re.compile("""data-image-url=\"([^\"]+)\"""") urlImage = re.findall(regexUrlImage, contenuPageOeuvre)[0] telechargerTableau(urlImage, normaliserNomFichier(nomFichierImage), zoom)
def telechargerArtiste(urlArtiste, zoom): contenuPageArtiste = getContenuUrl(urlArtiste, refererGoogleArtProject) regexNomArtiste = re.compile("""data-artist-name=\"([^\"]+)\"""") regexIdArtiste = re.compile("""data-artist-id=\"([^\"]+)\"""") nomArtiste = re.findall(regexNomArtiste, contenuPageArtiste)[0] idArtiste = int(re.findall(regexIdArtiste, contenuPageArtiste)[0])
nomArtiste telechargerTableauxPeintre(nomArtiste, idArtiste, zoom)
### exйcution du script
#telechargerOeuvre('http://www.googleartproject.com/collection/musee-dorsay-paris/artwork/the-saint-lazare-station-claude-monet/508102/', 'Monet_-_Gare_Saint-Lazare.jpg', 3) #telechargerArtiste('http://www.googleartproject.com/artist/claude-monet/4127022/', 2)
|
იუნიკოდზე აქვს პრობლემა მაგ. 32 ხაზზე ### URL d'un fragment d'image, dйcryptage d'un fragment d'image სხვაგანაც არის და ყველას რო ამოვიღებ და ისე ვუშვებ მერე მიწერს
CODE | C:\Users\User\Desktop\extractionGoogleArtProject\extractionGoogleArtProject>extractionGoogleArtProject.py telechargerOeuvre('http://www.googleartproject.com/collection/musee-dorsay-paris/artwork/the-saint-lazare-station-claude-monet/508102/', 'Monet_-_Gare_Saint-Lazare.jpg', 2) Traceback (most recent call last): File "C:\Users\User\Desktop\extractionGoogleArtProject\extractionGoogleArtProject\extractionGoogleArtProject.py", line 36, in <module> context.evаl(open(cheminjava[ertad]script, "r").read()) UnicodeDecodeError: 'ascii' codec can't decode byte 0xe9 in position 138117: ordinal not in range(128) |
ვინმე დამეხმარეთ რა.. * * * გავაკეთე ეგ პრობლება. კიდე არის core.js და იქ იყო // URL d'un fragment d'image, dйcryptage d'un fragment d'image ეს ხაზი და კიდე იუნიკოდის პრობლემა ქონდა. ეხლა რო ვაწვები შეცდომას აღარ მიწერს მაგრამ არც სურათს არ იწერს...
|