42fea0f 6cbc4c0 b5c4208
1
2
3
4
wget https://dumps.wikimedia.org/kkwiki/latest/kkwiki-latest-pages-articles.xml.bz2 wget http://data.statmt.org/cc-100/kk.txt.xz unxz kk.txt.xz python3 -m wikiextractor.WikiExtractor kkwiki-latest-pages-articles.xml.bz2 --output extracted --json