വിക്കിപീഡിയ, ഒരു സ്വതന്ത്ര വിജ്ഞാനകോശം.
import urllib2
import re
def outputHeader(outFileHandle):
print 'dict = {'
print >> outFileHandle, 'dict = {'
def appendToDictionary(outFileHandle, langCode, langName):
print ' \'' + langCode + '\': (u\''+langName+'\'),'
print >> outFileHandle, ' \'' + langCode + '\': (u\''+langName+'\'),'
def outputFooter(outFileHandle):
print ' }'
print >> outFileHandle, ' }'
def getWikiList():
response = urllib2.urlopen(urllib2.Request('http://s23.org/wikistats/wikipedias_wiki.php'))
data = response.read()
#inFileHandle = open('./url.txt', 'r')
#data = inFileHandle.read()
return data
def main():
outFileHandle = open('./wikidict.py','w')
outputHeader(outFileHandle)
data = getWikiList()
langInfoR = re.compile(r'\d+\n\|\s\[\[w\:[^\|]+\|([^\]]+).*\n.*\n\|\s\[\[\:([^\:]+)\:.*')
for langName, langCode in langInfoR.findall(data):
appendToDictionary(outFileHandle, langCode, langName)
outputFooter(outFileHandle)
if __name__ == "__main__":
try:
main()
finally:
print ""