{ "cells": [ { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "import requests,json\n", "\n", "instance_url = \"https://mastodon.social\"\n", "account_id = \"70565\"\n", "\n", "def get_statuses(min_id=None):\n", " parameters=f\"exclude_replies=true&exclude_reblogs=true\"\n", " if min_id:\n", " parameters += f\"&max_id={min_id}\"\n", " r = requests.get(f\"{instance_url}/api/v1/accounts/{account_id}/statuses?{parameters}\")\n", " return r.json()\n", "\n", "def get_link_id(data, previous_id=None):\n", " last_id = None\n", " articles = []\n", " if previous_id:\n", " last_id = previous_id\n", " for status in data:\n", " if 'card' in status and status['card']:\n", " if 'davidrevoy.com/article' in status['card']['url']:\n", " articles.append({\n", " \"url\": status['card']['url'],\n", " \"post_id\": status['id']\n", " })\n", " if not last_id or int(status['id']) < int(last_id):\n", " last_id = status['id']\n", " return last_id, articles" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "use last post as last_id: 110051048034247698\n", "use last post as last_id: 109988264693748577\n", "use last post as last_id: 109966435120459917\n", "use last post as last_id: 109959922952793083\n", "use last post as last_id: 109955465953200739\n", "use last post as last_id: 109695534954605490\n", "use last post as last_id: 109694221530555708\n", "use last post as last_id: 109688338516674856\n", "use last post as last_id: 109666798363908901\n", "use last post as last_id: 109665531064620308\n", "use last post as last_id: 109641890942010744\n", "use last post as last_id: 109631042524024320\n", "use last post as last_id: 109615602079447268\n", "use last post as last_id: 109614450485021041\n", "use last post as last_id: 109551450106488457\n", "use last post as last_id: 109542827843037100\n", "use last post as last_id: 109536809741433169\n", "use last post as last_id: 109536500125930936\n", "use last post as last_id: 109524201681268479\n", "use last post as last_id: 109520723992518406\n", "use last post as last_id: 109513539017562289\n", "use last post as last_id: 109513371386226518\n", "use last post as last_id: 109507291884526901\n", "use last post as last_id: 109490814384171609\n", "use last post as last_id: 109488699379649793\n", "use last post as last_id: 109484679389007007\n", "use last post as last_id: 109445207046419980\n", "use last post as last_id: 109440369270345684\n", "use last post as last_id: 109201442810650650\n", "use last post as last_id: 109190161791377998\n", "use last post as last_id: 109166756573980279\n", "use last post as last_id: 109081972644051491\n", "use last post as last_id: 109077204009467672\n", "use last post as last_id: 109048582803153598\n", "use last post as last_id: 109043138153208293\n", "use last post as last_id: 109032781765833033\n", "use last post as last_id: 109008345610185299\n", "use last post as last_id: 109003698871359004\n", "use last post as last_id: 108998163259962201\n", "use last post as last_id: 108985185088884403\n", "use last post as last_id: 108969347522624985\n", "use last post as last_id: 108958226139950477\n", "use last post as last_id: 108920414243331208\n", "use last post as last_id: 101036598942551636\n", "use last post as last_id: 101030247015278638\n", "use last post as last_id: 100992554524233256\n", "use last post as last_id: 100980257475992584\n", "use last post as last_id: 100973246732448977\n", "use last post as last_id: 100963307598008564\n", "use last post as last_id: 100951474269734454\n", "use last post as last_id: 100916035129648775\n", "use last post as last_id: 100895314498420896\n", "use last post as last_id: 100819919454922737\n", "use last post as last_id: 100764296811009571\n", "use last post as last_id: 100575663309044999\n", "use last post as last_id: 100475985747924949\n", "use last post as last_id: 100453363696674740\n", "use last post as last_id: 100401531824852155\n", "use last post as last_id: 100390614697305684\n", "use last post as last_id: 100344039026859611\n", "use last post as last_id: 100322101826835897\n", "use last post as last_id: 100318269114916110\n", "use last post as last_id: 100305154071895274\n", "use last post as last_id: 100299842863960004\n", "use last post as last_id: 100247086198133176\n", "use last post as last_id: 100242959987379087\n", "use last post as last_id: 99880368549679363\n", "use last post as last_id: 99840525144979208\n", "use last post as last_id: 99800818061861157\n", "use last post as last_id: 99777924258767158\n", "use last post as last_id: 99777898319483786\n", "use last post as last_id: 99773133822902374\n", "use last post as last_id: 99761473154883889\n", "use last post as last_id: 99729697964478172\n", "use last post as last_id: 99721715449436270\n", "use last post as last_id: 99711173491501545\n", "use last post as last_id: 99271529001662332\n", "use last post as last_id: 99185885905888666\n", "use last post as last_id: 99180277746017364\n", "use last post as last_id: 99168414988047796\n", "use last post as last_id: 99155446285473593\n", "use last post as last_id: 99133250875345253\n", "use last post as last_id: 99008971915406540\n", "use last post as last_id: 98998821235923664\n", "use last post as last_id: 98981371582685959\n", "use last post as last_id: 98953445225733822\n", "use last post as last_id: 98923541865872945\n", "use last post as last_id: 98920485474286544\n", "use last post as last_id: 98914615083406468\n", "use last post as last_id: 98907605930781507\n", "use last post as last_id: 98901526084298348\n", "use last post as last_id: 98900722413427501\n", "use last post as last_id: 98896089801884060\n", "use last post as last_id: 98894525948835232\n", "use last post as last_id: 98888949165706061\n", "use last post as last_id: 98884338411973212\n", "use last post as last_id: 98878792375279749\n", "use last post as last_id: 98873948516250917\n", "use last post as last_id: 98868556473516920\n", "use last post as last_id: 98861451232807850\n", "use last post as last_id: 98856193839261968\n", "use last post as last_id: 98854722593032120\n", "use last post as last_id: 98851853579756383\n", "use last post as last_id: 98849012198347130\n", "use last post as last_id: 98846146610745527\n", "use last post as last_id: 98837701032455262\n", "use last post as last_id: 98834695131580267\n", "use last post as last_id: 98827463928736107\n", "use last post as last_id: 98821436644968885\n", "use last post as last_id: 98816250220831962\n", "use last post as last_id: 21225477\n", "use last post as last_id: 21123907\n", "use last post as last_id: 21029277\n", "use last post as last_id: 21027601\n", "use last post as last_id: 20956649\n", "use last post as last_id: 20917634\n", "use last post as last_id: 15007569\n", "use last post as last_id: 14379550\n", "use last post as last_id: 13157378\n", "use last post as last_id: 12584968\n", "use last post as last_id: 12283172\n", "use last post as last_id: 12011489\n", "use last post as last_id: 10932310\n", "use last post as last_id: 10829692\n", "use last post as last_id: 10712954\n", "use last post as last_id: 9527874\n", "use last post as last_id: 8938875\n", "use last post as last_id: 8883894\n", "use last post as last_id: 7571615\n", "use last post as last_id: 6457436\n", "use last post as last_id: 6205107\n", "use last post as last_id: 6141289\n", "use last post as last_id: 5691347\n", "use last post as last_id: 4382080\n", "use last post as last_id: 4040461\n", "use last post as last_id: 4038767\n", "use last post as last_id: 3933417\n", "use last post as last_id: 3453492\n", "use last post as last_id: 3291580\n", "use last post as last_id: 2981819\n", "use last post as last_id: 2917758\n", "use last post as last_id: 2847528\n", "use last post as last_id: 2776862\n", "use last post as last_id: 2670935\n", "use last post as last_id: 2418941\n", "use last post as last_id: 2411607\n", "use last post as last_id: 2403458\n", "use last post as last_id: 2315009\n", "use last post as last_id: 2282636\n", "use last post as last_id: 2282308\n" ] }, { "ename": "IndexError", "evalue": "list index out of range", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[52], line 13\u001b[0m\n\u001b[1;32m 11\u001b[0m data \u001b[39m=\u001b[39m get_statuses(last_id)\n\u001b[1;32m 12\u001b[0m \u001b[39mif\u001b[39;00m previous_id \u001b[39m==\u001b[39m last_id:\n\u001b[0;32m---> 13\u001b[0m last_id \u001b[39m=\u001b[39m data[:\u001b[39m1\u001b[39;49m][\u001b[39m0\u001b[39;49m][\u001b[39m'\u001b[39m\u001b[39mid\u001b[39m\u001b[39m'\u001b[39m]\n\u001b[1;32m 14\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39muse last post as last_id: \u001b[39m\u001b[39m{\u001b[39;00mlast_id\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n", "\u001b[0;31mIndexError\u001b[0m: list index out of range" ] } ], "source": [ "data = get_statuses()\n", "data[:1]\n", "last_id = None\n", "previous_id = None\n", "all_articles = []\n", "while previous_id != last_id or not last_id:\n", " previous_id = last_id\n", " last_id, articles = get_link_id(data,last_id)\n", " for art in articles:\n", " all_articles.append(art)\n", " data = get_statuses(last_id)\n", " if previous_id == last_id:\n", " if len(data) > 0:\n", " last_id = data[:1][0]['id']\n", " print(f\"use last post as last_id: {last_id}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'url': 'https://www.davidrevoy.com/article977/artificial-inteligence-why-i-ll-not-hashtag-my-art-humanart-humanmade-or-noai',\n", " 'post_id': '110628351223530613'},\n", " {'url': 'https://www.davidrevoy.com/article976/lenovo-yoga-370-on-gnu-linux-technical-companion-article',\n", " 'post_id': '110622204293604103'},\n", " {'url': 'https://www.davidrevoy.com/article975/derivation-the-norwegian-nynorsk-book-of-outland-forlag',\n", " 'post_id': '110588383308155987'},\n", " {'url': 'https://www.davidrevoy.com/article968/la-reunion-des-communs-2023',\n", " 'post_id': '110351145886402617'},\n", " {'url': 'https://www.davidrevoy.com/article964/goodbye-blue-bird',\n", " 'post_id': '110191966190761988'},\n", " {'url': 'https://www.davidrevoy.com/article962/future-episode-38-is-open-for-proofreading',\n", " 'post_id': '110141551153980723'},\n", " {'url': 'https://www.davidrevoy.com/article961/hollywood-ca-for-immediate-release',\n", " 'post_id': '110124003637753839'},\n", " {'url': 'https://www.davidrevoy.com/article959/fa-bd-comics-books-on-scamazon-don-t-buy-them',\n", " 'post_id': '110090792042968092'},\n", " {'url': 'https://www.davidrevoy.com/article957/episode-38-production-report-1',\n", " 'post_id': '110056539077200266'},\n", " {'url': 'https://www.davidrevoy.com/article953/krita-brushes-2023-01-bundle',\n", " 'post_id': '109712113428192581'},\n", " {'url': 'https://www.davidrevoy.com/article938/in-the-midst-of-experimentation',\n", " 'post_id': '109263833465390968'},\n", " {'url': 'https://www.davidrevoy.com/article937/introduce-the-dragon-to-the-outside-world',\n", " 'post_id': '109246886949978895'},\n", " {'url': 'https://www.davidrevoy.com/article933/signing-session-in-november-2022-brive-and-toulouse',\n", " 'post_id': '109211991112027303'},\n", " {'url': 'https://www.davidrevoy.com/article932/painting-study',\n", " 'post_id': '109201832906889617'},\n", " {'url': 'https://www.davidrevoy.com/article919/episode-37-production-report-part-2',\n", " 'post_id': '108692183291305828'},\n", " {'url': 'https://www.davidrevoy.com/article913/fedora-36-kde-spin-for-a-digital-painting-workstation-reasons-and-post-install-guide',\n", " 'post_id': '108494359441847436'},\n", " {'url': 'https://www.davidrevoy.com/article912/back-from-geekfaeries-2022-festival',\n", " 'post_id': '108447730812314768'},\n", " {'url': 'https://www.davidrevoy.com/article911/signing-session-agenda-2022',\n", " 'post_id': '108409409011470654'},\n", " {'url': 'https://www.davidrevoy.com/article206/narcissus-echo',\n", " 'post_id': '108243528774782948'},\n", " {'url': 'https://www.davidrevoy.com/article591/cat-bird-fenestar-abstract-avatar-generators',\n", " 'post_id': '108198235638979790'},\n", " {'url': 'https://www.davidrevoy.com/article906/ep37-production-report-style',\n", " 'post_id': '108120003351476017'},\n", " {'url': 'https://www.davidrevoy.com/article904/how-proprietary-social-medias-are-shaping-the-future-of-peppercarrot',\n", " 'post_id': '108046410941108708'},\n", " {'url': 'https://www.davidrevoy.com/article894/the-market',\n", " 'post_id': '107853362540471360'},\n", " {'url': 'https://www.davidrevoy.com/article887/happy-holiday-season',\n", " 'post_id': '107503186392703114'},\n", " {'url': 'https://www.davidrevoy.com/article878/looking-for-french-to-english-translators-contributors',\n", " 'post_id': '107163295486531802'},\n", " {'url': 'https://www.davidrevoy.com/article871/brushwork-study-202110',\n", " 'post_id': '107060497283722021'},\n", " {'url': 'https://www.davidrevoy.com/article864/dream-cats-nfts-don-t-buy-them',\n", " 'post_id': '106975350903601880'},\n", " {'url': 'https://www.davidrevoy.com/article854/krita-brushes-2021-bundle',\n", " 'post_id': '106704188355930728'},\n", " {'url': 'https://www.davidrevoy.com/article849/derivation-episode-3-motion-comic-by-morevna-backstage-video',\n", " 'post_id': '106353021151974499'},\n", " {'url': 'https://www.davidrevoy.com/article847/the-official-chat-room-moved',\n", " 'post_id': '106320143449547549'},\n", " {'url': 'https://www.davidrevoy.com/article845/',\n", " 'post_id': '106286951915064744'},\n", " {'url': 'https://www.davidrevoy.com/article841/',\n", " 'post_id': '106177678835210671'},\n", " {'url': 'https://www.davidrevoy.com/article840/derivation-treasure-hunt-by-filipe-vieira',\n", " 'post_id': '106167151942434702'},\n", " {'url': 'https://www.davidrevoy.com/article837/',\n", " 'post_id': '106126853384603616'},\n", " {'url': 'https://www.davidrevoy.com/article834/penguicon-2021',\n", " 'post_id': '106098137971648100'},\n", " {'url': 'https://www.davidrevoy.com/article828/rms-is-back-at-the-fsf',\n", " 'post_id': '105939126102824911'},\n", " {'url': 'https://www.davidrevoy.com/article826/derivation-peppercarrot-book1-in-german-by-ondrej-brinkel',\n", " 'post_id': '105917147239665870'},\n", " {'url': 'https://www.davidrevoy.com/article818/',\n", " 'post_id': '105810558002883470'},\n", " {'url': 'https://www.davidrevoy.com/article815',\n", " 'post_id': '105729720639057326'},\n", " {'url': 'https://www.davidrevoy.com/article808',\n", " 'post_id': '105357205031879647'},\n", " {'url': 'https://www.davidrevoy.com/article806/',\n", " 'post_id': '105174408935812814'},\n", " {'url': 'https://www.davidrevoy.com/article796/decentralizing-the-book-publishing-of-translations',\n", " 'post_id': '105074148381396831'},\n", " {'url': 'https://www.davidrevoy.com/article794/a-feedback-of-the-first-24h',\n", " 'post_id': '105045758099953136'},\n", " {'url': 'https://www.davidrevoy.com/article791/',\n", " 'post_id': '105004380609072303'},\n", " {'url': 'https://www.davidrevoy.com/article790/',\n", " 'post_id': '104959944667265844'},\n", " {'url': 'https://www.davidrevoy.com/article789/',\n", " 'post_id': '104954215891270232'},\n", " {'url': 'https://www.davidrevoy.com/article788/',\n", " 'post_id': '104925934320407419'},\n", " {'url': 'https://www.davidrevoy.com/article30/ergonomic-of-graphics-tablets',\n", " 'post_id': '104858039025447101'},\n", " {'url': 'https://www.davidrevoy.com/article783',\n", " 'post_id': '104643494972996780'},\n", " {'url': 'https://www.davidrevoy.com/article782',\n", " 'post_id': '104626760260616106'},\n", " {'url': 'https://www.davidrevoy.com/article779/production-report-making-episode-33',\n", " 'post_id': '104371835129095609'},\n", " {'url': 'https://www.davidrevoy.com/article777/libre-graphic-meeting-online-2020-livestream',\n", " 'post_id': '104231129521089298'},\n", " {'url': 'https://www.davidrevoy.com/article775/grown-up-saffron',\n", " 'post_id': '104145524844201118'},\n", " {'url': 'https://www.davidrevoy.com/article771/podcasts-interviews',\n", " 'post_id': '104015325174748490'},\n", " {'url': 'https://www.davidrevoy.com/article769/peppercarrot-the-transcript-feature',\n", " 'post_id': '103952974357764046'},\n", " {'url': 'https://www.davidrevoy.com/article768/the-red-noses-icc-issue',\n", " 'post_id': '103923024665644017'},\n", " {'url': 'https://www.davidrevoy.com/article763/gnuess',\n", " 'post_id': '103823751745548665'},\n", " {'url': 'https://www.davidrevoy.com/article762/libreplanet-livestreaming',\n", " 'post_id': '103818529205795782'},\n", " {'url': 'https://www.davidrevoy.com/article761/',\n", " 'post_id': '103805950030605819'},\n", " {'url': 'https://www.davidrevoy.com/article758/pepper-cosplay-by-duda',\n", " 'post_id': '103753228138354651'},\n", " {'url': 'https://www.davidrevoy.com/article757/the-english-book-printed-project-production-report-3',\n", " 'post_id': '103737664368948232'},\n", " {'url': 'https://www.davidrevoy.com/article753/hibiki',\n", " 'post_id': '103454373940011446'},\n", " {'url': 'https://www.davidrevoy.com/article748/production-report-episode-31',\n", " 'post_id': '103290607628200546'},\n", " {'url': 'https://www.davidrevoy.com/article747/',\n", " 'post_id': '103260574312477487'},\n", " {'url': 'https://www.davidrevoy.com/article744/',\n", " 'post_id': '103205095102755900'},\n", " {'url': 'https://www.davidrevoy.com/article745/',\n", " 'post_id': '103102970055521588'},\n", " {'url': 'https://www.davidrevoy.com/article744/',\n", " 'post_id': '103092305378303336'},\n", " {'url': 'https://www.davidrevoy.com/article742/',\n", " 'post_id': '103023174583391736'},\n", " {'url': 'https://www.davidrevoy.com/article737/',\n", " 'post_id': '102983703190725028'},\n", " {'url': 'https://www.davidrevoy.com/article735/',\n", " 'post_id': '102909961904867774'},\n", " {'url': 'https://www.davidrevoy.com/article734/',\n", " 'post_id': '102869919707456243'},\n", " {'url': 'https://davidrevoy.com/article730/',\n", " 'post_id': '102700720311975359'},\n", " {'url': 'https://www.davidrevoy.com/article729/',\n", " 'post_id': '102666792169549277'},\n", " {'url': 'https://www.davidrevoy.com/article728/',\n", " 'post_id': '102654741402653608'},\n", " {'url': 'https://www.davidrevoy.com/article727/',\n", " 'post_id': '102593801400725423'},\n", " {'url': 'https://www.davidrevoy.com/article724/',\n", " 'post_id': '102452367323111253'},\n", " {'url': 'https://www.davidrevoy.com/article722/',\n", " 'post_id': '102220229320800640'},\n", " {'url': 'https://www.davidrevoy.com/article720/',\n", " 'post_id': '102073090978665899'},\n", " {'url': 'https://www.davidrevoy.com/article719/',\n", " 'post_id': '102066884894569657'},\n", " {'url': 'https://www.davidrevoy.com/article718/',\n", " 'post_id': '102054723253650859'},\n", " {'url': 'https://www.davidrevoy.com/article715/',\n", " 'post_id': '101925516779973717'},\n", " {'url': 'https://www.davidrevoy.com/article712/',\n", " 'post_id': '101869296748625625'},\n", " {'url': 'https://www.davidrevoy.com/article711/',\n", " 'post_id': '101850185146929117'},\n", " {'url': 'https://www.davidrevoy.com/article710/',\n", " 'post_id': '101834696498681828'},\n", " {'url': 'https://www.davidrevoy.com/article706/',\n", " 'post_id': '101755679205239325'},\n", " {'url': 'https://www.davidrevoy.com/article705/',\n", " 'post_id': '101753519779352618'},\n", " {'url': 'https://www.davidrevoy.com/article703/',\n", " 'post_id': '101676341635583173'},\n", " {'url': 'https://www.davidrevoy.com/article702/',\n", " 'post_id': '101664728147168988'},\n", " {'url': 'https://www.davidrevoy.com/article701',\n", " 'post_id': '101654206567352308'},\n", " {'url': 'https://www.davidrevoy.com/article343/krita-alternate-themes',\n", " 'post_id': '101359484724284082'},\n", " {'url': 'http://www.davidrevoy.com/article342/brush-preset-duo-bundle-for-krita',\n", " 'post_id': '101047403174289956'},\n", " {'url': 'http://www.davidrevoy.com/article341/review-purism-librem13-laptop',\n", " 'post_id': '99891807404684128'},\n", " {'url': 'http://www.davidrevoy.com/article339/what-is-the-best-krita-cursor',\n", " 'post_id': '99292231300298224'},\n", " {'url': 'http://www.davidrevoy.com/article335/krita-digital-painting-courses-at-university-cergy-pontoise',\n", " 'post_id': '18347526'},\n", " {'url': 'http://www.davidrevoy.com/article334/making-of-episode-23',\n", " 'post_id': '16906617'}]" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "all_articles" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import re\n", "for art in all_articles:\n", " re.match()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }