# refs: System.Web.Extensions
import System.Text
import System.Text.RegularExpressions
import System.Web.Script.Serialization
import AlbumArtDownloader.Scripts
import util
//Inheritors should override the Suffix property to return a valid amazon suffix (like com, co.uk, de, etc...).
abstract class Amazon(AlbumArtDownloader.Scripts.IScript):
virtual IncludeCustomerImages as bool:
get: return true // To avoid including customer images in the results, replace true with false here
virtual IncludeOfficalImages as bool:
get: return true // To avoid including official images in the results, replace true with false here
virtual Name as string:
get: return "Amazon (.${Suffix})"
Version as string:
get: return "0.13s"
Author as string:
get: return "Alex Vallat, ZOOT"
abstract protected Suffix as string:
get: pass
virtual protected CountryCode as string:
get: return "01"
virtual protected SearchIndex as string: //Deprectated, ignored.
get: return ""
virtual protected def GetUrl(artist as string, album as string) as string:
return "http://www.amazon.${Suffix}/gp/search?search-alias=popular&field-artist=${EncodeUrl(artist)}&field-title=${EncodeUrl(album)}&sort=relevancerank"
def Search(artist as string, album as string, results as IScriptResults):
artist = StripCharacters("&.'\";:?!", artist)
album = StripCharacters("&.'\";:?!", album)
url = GetUrl(artist, album)
resultsPage = GetPage(GetPageStream(url, null, true))
resultsRegex = Regex("]*title\\s*=\\s*\"(?[^\"]+)\"[^>]*href\\s*=\\s*\"(?[^\"]+?/dp/(?[^/]+)/)[^>]+>(?[^<]+)(?:)?", RegexOptions.Singleline | RegexOptions.IgnoreCase)
resultsMatches = resultsRegex.Matches(resultsPage)
results.EstimatedCount = resultsMatches.Count
if IncludeOfficalImages:
// Add official images first
for resultsMatch as Match in resultsMatches:
id = resultsMatch.Groups["id"].Value
url = resultsMatch.Groups["url"].Value
title = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["title"].Value)
artist = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["artist"].Value)
imageBase = "http://ecx.images-amazon.com/images/P/${id}.${CountryCode}."
thumbnail = TryGetImageStream(imageBase + "_THUMB_")
results.Add(thumbnail, "${artist} - ${title}", url, -1, -1, imageBase, CoverType.Front)
if IncludeCustomerImages:
// Now add customer images
json = JavaScriptSerializer()
count = 0
for resultsMatch as Match in resultsMatches:
// We hit a page for each result. Searches on Amazon should generally return the
// item that was searched for quickly if it's going to be found at all, so don't
// hammer the server.
count++
if count > 5:
break
id = resultsMatch.Groups["id"].Value
url = resultsMatch.Groups["url"].Value
title = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["title"].Value)
artist = System.Web.HttpUtility.HtmlDecode(resultsMatch.Groups["artist"].Value)
imageBase = "http://ecx.images-amazon.com/images/P/${id}.${CountryCode}."
images_url = "http://www.amazon.${Suffix}/gp/customer-media/product-gallery/${id}"
imagesPage = GetPage(GetPageStream(images_url, null, true))
jsonRegex = Regex('var state = (?{[^;]*});', RegexOptions.Multiline)
for jsonDataMatch as Match in jsonRegex.Matches(imagesPage):
jsonData = jsonDataMatch.Groups["json"].Value
// amazon.co.jp uses double-width backslashes when escaping JS strings. No, really.
jsonData = Regex("\").Replace(jsonData, "\\")
result = json.Deserialize[of ImageInfo](jsonData)
if result.imageList != null:
for image as ImageInfo.Image in result.imageList:
thumbnail_url = image.url
thumbnail_url = Regex("\\.jpg$").Replace(thumbnail_url, "._SX120_.jpg")
results.Add(thumbnail_url, "${artist} - ${title}",
images_url + "?currentImageID=${image.id}", image.width, image.height,
image.url, CoverType.Front)
def RetrieveFullSizeImage(imageBase):
if imageBase.EndsWith(".jpg"): // Customer images never have larger sizes (and must end in .jpg)
return TryGetImageStream(imageBase)
imageStream = TryGetImageStream(imageBase + "_SCRM_")
if imageStream != null:
return imageStream
//Fall back on Large size
return TryGetImageStream(imageBase + "_SCL_")
def TryGetImageStream(url):
request as System.Net.HttpWebRequest = System.Net.HttpWebRequest.Create(url)
try:
response = request.GetResponse()
if response.ContentLength > 43:
return response.GetResponseStream()
response.Close()
return null
except e as System.Net.WebException:
return null
class ImageInfo:
public pageUrl as string
public imageList as List[Image]
class Image:
public url as string
public id as string
public width as int
public height as int