{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Text tokenization\n", "\n", "This section contains code that will tokenize the transcription data and add new columns to the data frames for each transcription dataset.\n", "\n", "First, we run the definitions step from the previous section." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%run 02_definitions.ipynb" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load each transcription dataset into a data frame\n", "The `load_csv` function will read the data from each path constant and store data in a Pandas data frame." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "a = load_csv(ANTHONY)\n", "c = load_csv(CATT)\n", "s = load_csv(STANTON)\n", "t = load_csv(TERRELL)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Optional: Preview the first five lines of a loaded dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### First five lines for Anthony dataset" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CampaignProjectItemItemIdAssetAssetIdAssetStatusDownloadUrlTranscriptionTags
0Susan B. Anthony PapersSpeeches and other writingsSusan B. Anthony Papers: Speeches and Writings...mss11049038mss11049038-1179295completedhttp://tile.loc.gov/image-services/iiif/servic...Susan B. Anthony SPEECHES AND WRITINGS FI...May 1852
1Susan B. Anthony PapersSpeeches and other writingsSusan B. Anthony Papers: Speeches and Writings...mss11049038mss11049038-2179296completedhttp://tile.loc.gov/image-services/iiif/servic.../52\\r\\nS.B.A-\\r\\n\\r\\nDelivered for the\\r\\nFirs...NaN
2Susan B. Anthony PapersSpeeches and other writingsSusan B. Anthony Papers: Speeches and Writings...mss11049038mss11049038-3179297completedhttp://tile.loc.gov/image-services/iiif/servic...will the best & wisest of mothers continue\\r\\n...temperance
3Susan B. Anthony PapersSpeeches and other writingsSusan B. Anthony Papers: Speeches and Writings...mss11049038mss11049038-4179298completedhttp://tile.loc.gov/image-services/iiif/servic...[Mind] the youthful mind. Of how\\r\\nlittle av...temperance
4Susan B. Anthony PapersSpeeches and other writingsSusan B. Anthony Papers: Speeches and Writings...mss11049038mss11049038-5179299completedhttp://tile.loc.gov/image-services/iiif/servic...x\\r\\nWhile we labor to reclaim one generation ...temperance
\n", "
" ], "text/plain": [ " Campaign Project \\\n", "0 Susan B. Anthony Papers Speeches and other writings \n", "1 Susan B. Anthony Papers Speeches and other writings \n", "2 Susan B. Anthony Papers Speeches and other writings \n", "3 Susan B. Anthony Papers Speeches and other writings \n", "4 Susan B. Anthony Papers Speeches and other writings \n", "\n", " Item ItemId \\\n", "0 Susan B. Anthony Papers: Speeches and Writings... mss11049038 \n", "1 Susan B. Anthony Papers: Speeches and Writings... mss11049038 \n", "2 Susan B. Anthony Papers: Speeches and Writings... mss11049038 \n", "3 Susan B. Anthony Papers: Speeches and Writings... mss11049038 \n", "4 Susan B. Anthony Papers: Speeches and Writings... mss11049038 \n", "\n", " Asset AssetId AssetStatus \\\n", "0 mss11049038-1 179295 completed \n", "1 mss11049038-2 179296 completed \n", "2 mss11049038-3 179297 completed \n", "3 mss11049038-4 179298 completed \n", "4 mss11049038-5 179299 completed \n", "\n", " DownloadUrl \\\n", "0 http://tile.loc.gov/image-services/iiif/servic... \n", "1 http://tile.loc.gov/image-services/iiif/servic... \n", "2 http://tile.loc.gov/image-services/iiif/servic... \n", "3 http://tile.loc.gov/image-services/iiif/servic... \n", "4 http://tile.loc.gov/image-services/iiif/servic... \n", "\n", " Transcription Tags \n", "0 Susan B. Anthony SPEECHES AND WRITINGS FI... May 1852 \n", "1 /52\\r\\nS.B.A-\\r\\n\\r\\nDelivered for the\\r\\nFirs... NaN \n", "2 will the best & wisest of mothers continue\\r\\n... temperance \n", "3 [Mind] the youthful mind. Of how\\r\\nlittle av... temperance \n", "4 x\\r\\nWhile we labor to reclaim one generation ... temperance " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### First five lines for Catt dataset" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CampaignProjectItemItemIdAssetAssetIdAssetStatusDownloadUrlTranscriptionTags
0Carrie Chapman Catt PapersSpeeches and articlesCarrie Chapman Catt Papers: Speech and Article...mss154040385mss154040385-1189284completedhttp://tile.loc.gov/image-services/iiif/servic...CATT, Carrie Chapman\\r\\nSPEECH, ARTICLE, BOOK ...NaN
1Carrie Chapman Catt PapersSpeeches and articlesCarrie Chapman Catt Papers: Speech and Article...mss154040385mss154040385-2189285completedhttp://tile.loc.gov/image-services/iiif/servic...-2-\\r\\nWe appeal in the name of our foremother...NaN
2Carrie Chapman Catt PapersSpeeches and articlesCarrie Chapman Catt Papers: Speech and Article...mss154040385mss154040385-3189286completedhttp://tile.loc.gov/image-services/iiif/servic...AN APPEAL FOR LIBERTY. 1915\\r\\n\\r\\nBy Carri...NaN
3Carrie Chapman Catt PapersSpeeches and articlesCarrie Chapman Catt Papers: Speech and Article...mss154040386mss154040386-1189287completedhttp://tile.loc.gov/image-services/iiif/servic...CATT, Carrie Chapman\\r\\nSPEECH, ARTICLE, BOOK ...NaN
4Carrie Chapman Catt PapersSpeeches and articlesCarrie Chapman Catt Papers: Speech and Article...mss154040386mss154040386-2189288completedhttp://tile.loc.gov/image-services/iiif/servic...The \\r\\nWoman Citizen\\r\\nA WEEKLY CHRONICLE OF...NaN
\n", "
" ], "text/plain": [ " Campaign Project \\\n", "0 Carrie Chapman Catt Papers Speeches and articles \n", "1 Carrie Chapman Catt Papers Speeches and articles \n", "2 Carrie Chapman Catt Papers Speeches and articles \n", "3 Carrie Chapman Catt Papers Speeches and articles \n", "4 Carrie Chapman Catt Papers Speeches and articles \n", "\n", " Item ItemId \\\n", "0 Carrie Chapman Catt Papers: Speech and Article... mss154040385 \n", "1 Carrie Chapman Catt Papers: Speech and Article... mss154040385 \n", "2 Carrie Chapman Catt Papers: Speech and Article... mss154040385 \n", "3 Carrie Chapman Catt Papers: Speech and Article... mss154040386 \n", "4 Carrie Chapman Catt Papers: Speech and Article... mss154040386 \n", "\n", " Asset AssetId AssetStatus \\\n", "0 mss154040385-1 189284 completed \n", "1 mss154040385-2 189285 completed \n", "2 mss154040385-3 189286 completed \n", "3 mss154040386-1 189287 completed \n", "4 mss154040386-2 189288 completed \n", "\n", " DownloadUrl \\\n", "0 http://tile.loc.gov/image-services/iiif/servic... \n", "1 http://tile.loc.gov/image-services/iiif/servic... \n", "2 http://tile.loc.gov/image-services/iiif/servic... \n", "3 http://tile.loc.gov/image-services/iiif/servic... \n", "4 http://tile.loc.gov/image-services/iiif/servic... \n", "\n", " Transcription Tags \n", "0 CATT, Carrie Chapman\\r\\nSPEECH, ARTICLE, BOOK ... NaN \n", "1 -2-\\r\\nWe appeal in the name of our foremother... NaN \n", "2 AN APPEAL FOR LIBERTY. 1915\\r\\n\\r\\nBy Carri... NaN \n", "3 CATT, Carrie Chapman\\r\\nSPEECH, ARTICLE, BOOK ... NaN \n", "4 The \\r\\nWoman Citizen\\r\\nA WEEKLY CHRONICLE OF... NaN " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### First five lines for Stanton dataset" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CampaignProjectItemItemIdAssetAssetIdAssetStatusDownloadUrlTranscriptionTags
0Elizabeth Cady Stanton PapersGeneral correspondenceElizabeth Cady Stanton Papers: General Corresp...mss412100001mss412100001-1179712completedhttp://tile.loc.gov/image-services/iiif/servic...Elizabeth Cady Stanton GENERAL CORRESPONDENCE...NaN
1Elizabeth Cady Stanton PapersGeneral correspondenceElizabeth Cady Stanton Papers: General Corresp...mss412100001mss412100001-2179713completedhttp://tile.loc.gov/image-services/iiif/servic...The following four letters are \\r\\nfrom Daniel...Peter Smith; Daniel Cady; Judge Cady
2Elizabeth Cady Stanton PapersGeneral correspondenceElizabeth Cady Stanton Papers: General Corresp...mss412100001mss412100001-3179714completedhttp://tile.loc.gov/image-services/iiif/servic...22 ...NaN
3Elizabeth Cady Stanton PapersGeneral correspondenceElizabeth Cady Stanton Papers: General Corresp...mss412100001mss412100001-4179715completedhttp://tile.loc.gov/image-services/iiif/servic...he could to make her respectable & happy. That...Peter Smith; Bonaparte
4Elizabeth Cady Stanton PapersGeneral correspondenceElizabeth Cady Stanton Papers: General Corresp...mss412100001mss412100001-5179716completedhttp://tile.loc.gov/image-services/iiif/servic...Johnstown 2 D Paid 10\\r\\n\\r\\n\\r\\nPeter Smi...Peter Smith
\n", "
" ], "text/plain": [ " Campaign Project \\\n", "0 Elizabeth Cady Stanton Papers General correspondence \n", "1 Elizabeth Cady Stanton Papers General correspondence \n", "2 Elizabeth Cady Stanton Papers General correspondence \n", "3 Elizabeth Cady Stanton Papers General correspondence \n", "4 Elizabeth Cady Stanton Papers General correspondence \n", "\n", " Item ItemId \\\n", "0 Elizabeth Cady Stanton Papers: General Corresp... mss412100001 \n", "1 Elizabeth Cady Stanton Papers: General Corresp... mss412100001 \n", "2 Elizabeth Cady Stanton Papers: General Corresp... mss412100001 \n", "3 Elizabeth Cady Stanton Papers: General Corresp... mss412100001 \n", "4 Elizabeth Cady Stanton Papers: General Corresp... mss412100001 \n", "\n", " Asset AssetId AssetStatus \\\n", "0 mss412100001-1 179712 completed \n", "1 mss412100001-2 179713 completed \n", "2 mss412100001-3 179714 completed \n", "3 mss412100001-4 179715 completed \n", "4 mss412100001-5 179716 completed \n", "\n", " DownloadUrl \\\n", "0 http://tile.loc.gov/image-services/iiif/servic... \n", "1 http://tile.loc.gov/image-services/iiif/servic... \n", "2 http://tile.loc.gov/image-services/iiif/servic... \n", "3 http://tile.loc.gov/image-services/iiif/servic... \n", "4 http://tile.loc.gov/image-services/iiif/servic... \n", "\n", " Transcription \\\n", "0 Elizabeth Cady Stanton GENERAL CORRESPONDENCE... \n", "1 The following four letters are \\r\\nfrom Daniel... \n", "2 22 ... \n", "3 he could to make her respectable & happy. That... \n", "4 Johnstown 2 D Paid 10\\r\\n\\r\\n\\r\\nPeter Smi... \n", "\n", " Tags \n", "0 NaN \n", "1 Peter Smith; Daniel Cady; Judge Cady \n", "2 NaN \n", "3 Peter Smith; Bonaparte \n", "4 Peter Smith " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### First five lines for Terrell dataset" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CampaignProjectItemItemIdAssetAssetIdAssetStatusDownloadUrlTranscriptionTags
0Mary Church Terrell: Advocate for African Amer...Address and appointment booksMary Church Terrell Papers: Appointment Calend...mss425490014mss425490014-17580completedhttp://tile.loc.gov/image-services/iiif/servic...Office Supplies typewriter ribbons fountain pe...Mrs Ella Wheeler Wilcox; Woman Suffrage Conven...
1Mary Church Terrell: Advocate for African Amer...Address and appointment booksMary Church Terrell Papers: Appointment Calend...mss425490014mss425490014-27581completedhttp://tile.loc.gov/image-services/iiif/servic...March 16, Wednesday,1904 - Dr. Booker Washingt...Cruger; Calloway; VanRensselaer; Booker; Washi...
2Mary Church Terrell: Advocate for African Amer...Address and appointment booksMary Church Terrell Papers: Appointment Calend...mss425490014mss425490014-37582completedhttp://tile.loc.gov/image-services/iiif/servic...Fountain Pens Repaired\\r\\nTablets\\r\\nTypewrite...Pennsylvania; committee; Washington Post
3Mary Church Terrell: Advocate for African Amer...Address and appointment booksMary Church Terrell Papers: Appointment Calend...mss425490014mss425490014-47583completedhttp://tile.loc.gov/image-services/iiif/servic...May, 1904\\r\\n\\r\\n1 SUNDAY Received invitation ...NaN
4Mary Church Terrell: Advocate for African Amer...Address and appointment booksMary Church Terrell Papers: Appointment Calend...mss425490014mss425490014-57584completedhttp://tile.loc.gov/image-services/iiif/servic...June, 1904\\r\\n\\r\\n7 TUESDAY Reached Bremer Hav...Berlin; Congress morning; June 1904; Paris
\n", "
" ], "text/plain": [ " Campaign \\\n", "0 Mary Church Terrell: Advocate for African Amer... \n", "1 Mary Church Terrell: Advocate for African Amer... \n", "2 Mary Church Terrell: Advocate for African Amer... \n", "3 Mary Church Terrell: Advocate for African Amer... \n", "4 Mary Church Terrell: Advocate for African Amer... \n", "\n", " Project \\\n", "0 Address and appointment books \n", "1 Address and appointment books \n", "2 Address and appointment books \n", "3 Address and appointment books \n", "4 Address and appointment books \n", "\n", " Item ItemId \\\n", "0 Mary Church Terrell Papers: Appointment Calend... mss425490014 \n", "1 Mary Church Terrell Papers: Appointment Calend... mss425490014 \n", "2 Mary Church Terrell Papers: Appointment Calend... mss425490014 \n", "3 Mary Church Terrell Papers: Appointment Calend... mss425490014 \n", "4 Mary Church Terrell Papers: Appointment Calend... mss425490014 \n", "\n", " Asset AssetId AssetStatus \\\n", "0 mss425490014-1 7580 completed \n", "1 mss425490014-2 7581 completed \n", "2 mss425490014-3 7582 completed \n", "3 mss425490014-4 7583 completed \n", "4 mss425490014-5 7584 completed \n", "\n", " DownloadUrl \\\n", "0 http://tile.loc.gov/image-services/iiif/servic... \n", "1 http://tile.loc.gov/image-services/iiif/servic... \n", "2 http://tile.loc.gov/image-services/iiif/servic... \n", "3 http://tile.loc.gov/image-services/iiif/servic... \n", "4 http://tile.loc.gov/image-services/iiif/servic... \n", "\n", " Transcription \\\n", "0 Office Supplies typewriter ribbons fountain pe... \n", "1 March 16, Wednesday,1904 - Dr. Booker Washingt... \n", "2 Fountain Pens Repaired\\r\\nTablets\\r\\nTypewrite... \n", "3 May, 1904\\r\\n\\r\\n1 SUNDAY Received invitation ... \n", "4 June, 1904\\r\\n\\r\\n7 TUESDAY Reached Bremer Hav... \n", "\n", " Tags \n", "0 Mrs Ella Wheeler Wilcox; Woman Suffrage Conven... \n", "1 Cruger; Calloway; VanRensselaer; Booker; Washi... \n", "2 Pennsylvania; committee; Washington Post \n", "3 NaN \n", "4 Berlin; Congress morning; June 1904; Paris " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create a new column containing the output of the `tokens` function\n", "The `tokens` function uses the previously loaded spaCy model to analyze each word in the transcription. This results in several values for each word, including the lemma, the part-of-speech tag, the shape of the word, and whether it is a stop word or number." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Tokenizing text for dataset: Susan B. Anthony Papers\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Tokenizing text for dataset: Carrie Chapman Catt Papers\n", "Tokenizing text for dataset: Elizabeth Cady Stanton Papers\n", "Tokenizing text for dataset: Mary Church Terrell: Advocate for African Americans and Women\n", "Done!\n" ] } ], "source": [ "# NOTE: This will take a while to run\n", "for dataset in [a, c, s, t]:\n", " print(f\"Tokenizing text for dataset: {dataset['Campaign'][0]}\")\n", " dataset['tokenized_text'] = dataset['Transcription'].apply(tokens)\n", "print(\"Done!\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create a new column containing the output of the `entities` function\n", "The `entities` function uses the previously loaded spaCy model to identify persons, places, organizations, etc." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Identifying entities for dataset: Susan B. Anthony Papers\n", "Identifying entities for dataset: Carrie Chapman Catt Papers\n", "Identifying entities for dataset: Elizabeth Cady Stanton Papers\n", "Identifying entities for dataset: Mary Church Terrell: Advocate for African Americans and Women\n", "Done!\n" ] } ], "source": [ "# NOTE: This will take a while to run\n", "for dataset in [a, c, s, t]:\n", " print(f\"Identifying entities for dataset: {dataset['Campaign'][0]}\")\n", " dataset['entities'] = dataset['Transcription'].apply(entities)\n", "print(\"Done!\")" ] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "### Optional: Preview the results of the `entities` functions for the first row of a dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Entities for first row in Anthony dataset" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TextEntity
0Susan B. Anthony SPEECHESPERSON
1WRITINGS FILE DeliveredORG
2firstORDINAL
3BataviaGPE
42CARDINAL
5May 1852DATE
61852DATE
\n", "
" ], "text/plain": [ " Text Entity\n", "0 Susan B. Anthony SPEECHES PERSON\n", "1 WRITINGS FILE Delivered ORG\n", "2 first ORDINAL\n", "3 Batavia GPE\n", "4 2 CARDINAL\n", "5 May 1852 DATE\n", "6 1852 DATE" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame([{\"Text\": row[0], \"Entity\": row[3]} for row in a['entities'].iloc[0]]).head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Entities for first row in Catt dataset" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TextEntity
0CATTPERSON
1Carrie Chapman\\r\\nSPEECHPERSON
2ARTICLE, BOOK FILE\\r\\nSpeechLAW
\n", "
" ], "text/plain": [ " Text Entity\n", "0 CATT PERSON\n", "1 Carrie Chapman\\r\\nSPEECH PERSON\n", "2 ARTICLE, BOOK FILE\\r\\nSpeech LAW" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame([{\"Text\": row[0], \"Entity\": row[3]} for row in c['entities'].iloc[0]]).head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Entities for first row in Stanton dataset" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TextEntity
0Elizabeth Cady StantonPERSON
11814 - 49DATE
\n", "
" ], "text/plain": [ " Text Entity\n", "0 Elizabeth Cady Stanton PERSON\n", "1 1814 - 49 DATE" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame([{\"Text\": row[0], \"Entity\": row[3]} for row in s['entities'].iloc[0]]).head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Entities for first row in Terrell dataset" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TextEntity
0SwettPERSON
1Stationery Blank BooksORG
2P SwettPERSON
3February, 1904DATE
4178CARDINAL
5Monday\\r\\n2 Tuesday\\r\\n3DATE
6Wednesday\\r\\n4DATE
7Thursday \\r\\n5 Friday\\r\\n6DATE
8Crandall AssociationORG
97:30\\r\\nSpecialTIME
\n", "
" ], "text/plain": [ " Text Entity\n", "0 Swett PERSON\n", "1 Stationery Blank Books ORG\n", "2 P Swett PERSON\n", "3 February, 1904 DATE\n", "4 178 CARDINAL\n", "5 Monday\\r\\n2 Tuesday\\r\\n3 DATE\n", "6 Wednesday\\r\\n4 DATE\n", "7 Thursday \\r\\n5 Friday\\r\\n6 DATE\n", "8 Crandall Association ORG\n", "9 7:30\\r\\nSpecial TIME" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame([{\"Text\": row[0], \"Entity\": row[3]} for row in t['entities'].iloc[0]]).head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Run the `separate_text` function to isolate tokens by category\n", "\n", "The `separate_text` function uses labels generated by the `spaCy` library to organize the contents of each transcription into actual text, stop words (conjunctions, prepositions, etc.), non-alphanumeric strings (punctuation, whitespace, etc.), numbers, and ambiguous words (when a transcriber cannot make out a word or character, a `?` will be used for the unknown character(s); this is reflected in the analyzed pattern of the word which is used to remove these words from the text category)." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Organizing tokens by category for: Susan B. Anthony Papers\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Organizing tokens by category for: Carrie Chapman Catt Papers\n", "Organizing tokens by category for: Elizabeth Cady Stanton Papers\n", "Organizing tokens by category for: Mary Church Terrell: Advocate for African Americans and Women\n", "Done!\n" ] } ], "source": [ "# Run the separate_text function on the Anthony data frame\n", "for dataset in [a, c, s, t]:\n", " print(f\"Organizing tokens by category for: {dataset['Campaign'][0]}\")\n", " separate_text(dataset)\n", "print(\"Done!\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Cache the result for next steps" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "for dataset in [a, c, s, t]:\n", " write_cache(dataset, str(dataset['Campaign'][0]))" ] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true }, "source": [ "### Optional: Preview the results for the first five rows of the updated data frame" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### First five rows of updated Anthony dataset" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CampaignProjectItemItemIdAssetAssetIdAssetStatusDownloadUrlTranscriptionTagstokenized_textentitiestextstop_wordsnonalphanumsnumbersambigsprocessed_text
0Susan B. Anthony PapersSpeeches and other writingsSusan B. Anthony Papers: Speeches and Writings...mss11049038mss11049038-1179295completedhttp://tile.loc.gov/image-services/iiif/servic...Susan B. Anthony SPEECHES AND WRITINGS FI...May 1852[(Susan, Susan, PROPN, NNP, compound, Xxxxx, T...[(Susan B. Anthony SPEECHES, 0, 30, PERSO...[(Susan, Susan, PROPN, NNP, compound, Xxxxx, T...[(AND, and, CCONJ, CC, cc, XXX, True, True), (...[( , , SPACE, _SP, dep, , False, ...[(2, 2, NUM, CD, nummod, d, False, False), (18...[][susan, b., anthony, speeches, writing, file, ...
1Susan B. Anthony PapersSpeeches and other writingsSusan B. Anthony Papers: Speeches and Writings...mss11049038mss11049038-2179296completedhttp://tile.loc.gov/image-services/iiif/servic.../52\\r\\nS.B.A-\\r\\n\\r\\nDelivered for the\\r\\nFirs...NaN[(/52, /52, PROPN, NNP, punct, /dd, False, Fal...[(Batavia, 44, 51, GPE), (N.J., 52, 56, GPE), ...[(/52, /52, PROPN, NNP, punct, /dd, False, Fal...[(for, for, ADP, IN, prep, xxx, True, True), (...[(\\r\\n, \\r\\n, SPACE, _SP, dep, \\r\\n, False, Fa...[(1852, 1852, NUM, CD, nummod, dddd, False, Fa...[][/52, s.b.a-, deliver, batavia, n.j., company,...
2Susan B. Anthony PapersSpeeches and other writingsSusan B. Anthony Papers: Speeches and Writings...mss11049038mss11049038-3179297completedhttp://tile.loc.gov/image-services/iiif/servic...will the best & wisest of mothers continue\\r\\n...temperance[(will, will, AUX, MD, aux, xxxx, True, True),...[(the\\r\\nSociety, 295, 307, ORG), (two, 324, 3...[(best, good, ADJ, JJS, nsubj, xxxx, True, Fal...[(will, will, AUX, MD, aux, xxxx, True, True),...[(&, &, CCONJ, CC, cc, &, False, False), (\\r\\n...[][][good, wise, mother, continue, son, fall, vict...
3Susan B. Anthony PapersSpeeches and other writingsSusan B. Anthony Papers: Speeches and Writings...mss11049038mss11049038-4179298completedhttp://tile.loc.gov/image-services/iiif/servic...[Mind] the youthful mind. Of how\\r\\nlittle av...temperance[([, [, X, XX, dep, [, False, False), (Mind, m...[(christian, 77, 86, NORP), (truth & sobernes...[(Mind, mind, VERB, VB, dep, Xxxx, True, False...[(the, the, DET, DT, det, xxx, True, True), (O...[([, [, X, XX, dep, [, False, False), (], ], X...[][][mind, youthful, mind, little, avail, untire, ...
4Susan B. Anthony PapersSpeeches and other writingsSusan B. Anthony Papers: Speeches and Writings...mss11049038mss11049038-5179299completedhttp://tile.loc.gov/image-services/iiif/servic...x\\r\\nWhile we labor to reclaim one generation ...temperance[(x, x, ADP, IN, punct, x, True, False), (\\r\\n...[(one, 29, 32, CARDINAL), (Legislature, 145, 1...[(x, x, ADP, IN, punct, x, True, False), (labo...[(While, while, SCONJ, IN, mark, Xxxxx, True, ...[(\\r\\n, \\r\\n, SPACE, _SP, dep, \\r\\n, False, Fa...[][][x, labor, reclaim, generation, drunkard, rise...
\n", "
" ], "text/plain": [ " Campaign Project \\\n", "0 Susan B. Anthony Papers Speeches and other writings \n", "1 Susan B. Anthony Papers Speeches and other writings \n", "2 Susan B. Anthony Papers Speeches and other writings \n", "3 Susan B. Anthony Papers Speeches and other writings \n", "4 Susan B. Anthony Papers Speeches and other writings \n", "\n", " Item ItemId \\\n", "0 Susan B. Anthony Papers: Speeches and Writings... mss11049038 \n", "1 Susan B. Anthony Papers: Speeches and Writings... mss11049038 \n", "2 Susan B. Anthony Papers: Speeches and Writings... mss11049038 \n", "3 Susan B. Anthony Papers: Speeches and Writings... mss11049038 \n", "4 Susan B. Anthony Papers: Speeches and Writings... mss11049038 \n", "\n", " Asset AssetId AssetStatus \\\n", "0 mss11049038-1 179295 completed \n", "1 mss11049038-2 179296 completed \n", "2 mss11049038-3 179297 completed \n", "3 mss11049038-4 179298 completed \n", "4 mss11049038-5 179299 completed \n", "\n", " DownloadUrl \\\n", "0 http://tile.loc.gov/image-services/iiif/servic... \n", "1 http://tile.loc.gov/image-services/iiif/servic... \n", "2 http://tile.loc.gov/image-services/iiif/servic... \n", "3 http://tile.loc.gov/image-services/iiif/servic... \n", "4 http://tile.loc.gov/image-services/iiif/servic... \n", "\n", " Transcription Tags \\\n", "0 Susan B. Anthony SPEECHES AND WRITINGS FI... May 1852 \n", "1 /52\\r\\nS.B.A-\\r\\n\\r\\nDelivered for the\\r\\nFirs... NaN \n", "2 will the best & wisest of mothers continue\\r\\n... temperance \n", "3 [Mind] the youthful mind. Of how\\r\\nlittle av... temperance \n", "4 x\\r\\nWhile we labor to reclaim one generation ... temperance \n", "\n", " tokenized_text \\\n", "0 [(Susan, Susan, PROPN, NNP, compound, Xxxxx, T... \n", "1 [(/52, /52, PROPN, NNP, punct, /dd, False, Fal... \n", "2 [(will, will, AUX, MD, aux, xxxx, True, True),... \n", "3 [([, [, X, XX, dep, [, False, False), (Mind, m... \n", "4 [(x, x, ADP, IN, punct, x, True, False), (\\r\\n... \n", "\n", " entities \\\n", "0 [(Susan B. Anthony SPEECHES, 0, 30, PERSO... \n", "1 [(Batavia, 44, 51, GPE), (N.J., 52, 56, GPE), ... \n", "2 [(the\\r\\nSociety, 295, 307, ORG), (two, 324, 3... \n", "3 [(christian, 77, 86, NORP), (truth & sobernes... \n", "4 [(one, 29, 32, CARDINAL), (Legislature, 145, 1... \n", "\n", " text \\\n", "0 [(Susan, Susan, PROPN, NNP, compound, Xxxxx, T... \n", "1 [(/52, /52, PROPN, NNP, punct, /dd, False, Fal... \n", "2 [(best, good, ADJ, JJS, nsubj, xxxx, True, Fal... \n", "3 [(Mind, mind, VERB, VB, dep, Xxxx, True, False... \n", "4 [(x, x, ADP, IN, punct, x, True, False), (labo... \n", "\n", " stop_words \\\n", "0 [(AND, and, CCONJ, CC, cc, XXX, True, True), (... \n", "1 [(for, for, ADP, IN, prep, xxx, True, True), (... \n", "2 [(will, will, AUX, MD, aux, xxxx, True, True),... \n", "3 [(the, the, DET, DT, det, xxx, True, True), (O... \n", "4 [(While, while, SCONJ, IN, mark, Xxxxx, True, ... \n", "\n", " nonalphanums \\\n", "0 [( , , SPACE, _SP, dep, , False, ... \n", "1 [(\\r\\n, \\r\\n, SPACE, _SP, dep, \\r\\n, False, Fa... \n", "2 [(&, &, CCONJ, CC, cc, &, False, False), (\\r\\n... \n", "3 [([, [, X, XX, dep, [, False, False), (], ], X... \n", "4 [(\\r\\n, \\r\\n, SPACE, _SP, dep, \\r\\n, False, Fa... \n", "\n", " numbers ambigs \\\n", "0 [(2, 2, NUM, CD, nummod, d, False, False), (18... [] \n", "1 [(1852, 1852, NUM, CD, nummod, dddd, False, Fa... [] \n", "2 [] [] \n", "3 [] [] \n", "4 [] [] \n", "\n", " processed_text \n", "0 [susan, b., anthony, speeches, writing, file, ... \n", "1 [/52, s.b.a-, deliver, batavia, n.j., company,... \n", "2 [good, wise, mother, continue, son, fall, vict... \n", "3 [mind, youthful, mind, little, avail, untire, ... \n", "4 [x, labor, reclaim, generation, drunkard, rise... " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a.iloc[0:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### First five rows of updated Catt dataset" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CampaignProjectItemItemIdAssetAssetIdAssetStatusDownloadUrlTranscriptionTagstokenized_textentitiestextstop_wordsnonalphanumsnumbersambigsprocessed_text
0Carrie Chapman Catt PapersSpeeches and articlesCarrie Chapman Catt Papers: Speech and Article...mss154040385mss154040385-1189284completedhttp://tile.loc.gov/image-services/iiif/servic...CATT, Carrie Chapman\\r\\nSPEECH, ARTICLE, BOOK ...NaN[(CATT, CATT, PROPN, NNP, ROOT, XXXX, True, Fa...[(CATT, 0, 4, PERSON), (Carrie Chapman\\r\\nSPEE...[(CATT, CATT, PROPN, NNP, ROOT, XXXX, True, Fa...[(An, an, DET, DT, det, Xx, True, True), (For,...[(,, ,, PUNCT, ,, punct, ,, False, False), (\\r...[][][catt, carrie, chapman, speech, article, book,...
1Carrie Chapman Catt PapersSpeeches and articlesCarrie Chapman Catt Papers: Speech and Article...mss154040385mss154040385-2189285completedhttp://tile.loc.gov/image-services/iiif/servic...-2-\\r\\nWe appeal in the name of our foremother...NaN[(-2-, -2-, PUNCT, ``, punct, -d-, False, Fals...[(-2-\\r\\n, 0, 5, PERSON), (American, 428, 436,...[(appeal, appeal, VERB, VBP, ccomp, xxxx, True...[(We, we, PRON, PRP, nsubj, Xx, True, True), (...[(-2-, -2-, PUNCT, ``, punct, -d-, False, Fals...[(1,600,000, 1,600,000, NUM, CD, nummod, d,ddd...[][appeal, foremother, forefather, equal, courag...
2Carrie Chapman Catt PapersSpeeches and articlesCarrie Chapman Catt Papers: Speech and Article...mss154040385mss154040385-3189286completedhttp://tile.loc.gov/image-services/iiif/servic...AN APPEAL FOR LIBERTY. 1915\\r\\n\\r\\nBy Carri...NaN[(AN, an, DET, DT, det, XX, True, True), (APPE...[(1915, 26, 30, DATE), (Carrie Chapman Catt, 3...[(APPEAL, APPEAL, PROPN, NNP, ROOT, XXXX, True...[(AN, an, DET, DT, det, XX, True, True), (FOR,...[(., ., PUNCT, ., punct, ., False, False), ( ...[(1915, 1915, NUM, CD, ROOT, dddd, False, Fals...[][appeal, liberty, carrie, chapman, catt, year,...
3Carrie Chapman Catt PapersSpeeches and articlesCarrie Chapman Catt Papers: Speech and Article...mss154040386mss154040386-1189287completedhttp://tile.loc.gov/image-services/iiif/servic...CATT, Carrie Chapman\\r\\nSPEECH, ARTICLE, BOOK ...NaN[(CATT, CATT, PROPN, NNP, ROOT, XXXX, True, Fa...[(CATT, 0, 4, PERSON), (Carrie Chapman\\r\\nSPEE...[(CATT, CATT, PROPN, NNP, ROOT, XXXX, True, Fa...[(Be, be, AUX, VB, ROOT, Xx, True, True)][(,, ,, PUNCT, ,, punct, ,, False, False), (\\r...[][][catt, carrie, chapman, speech, article, book,...
4Carrie Chapman Catt PapersSpeeches and articlesCarrie Chapman Catt Papers: Speech and Article...mss154040386mss154040386-2189288completedhttp://tile.loc.gov/image-services/iiif/servic...The \\r\\nWoman Citizen\\r\\nA WEEKLY CHRONICLE OF...NaN[(The, the, DET, DT, det, Xxx, True, True), (\\...[(Carrie Chapman Catt, 156, 175, PERSON), (Con...[(Woman, Woman, PROPN, NNP, compound, Xxxxx, T...[(The, the, DET, DT, det, Xxx, True, True), (A...[(\\r\\n, \\r\\n, SPACE, _SP, dep, \\r\\n, False, Fa...[(21, 21, NUM, CD, nummod, dd, False, False), ...[][woman, citizen, weekly, chronicle, progress, ...
\n", "
" ], "text/plain": [ " Campaign Project \\\n", "0 Carrie Chapman Catt Papers Speeches and articles \n", "1 Carrie Chapman Catt Papers Speeches and articles \n", "2 Carrie Chapman Catt Papers Speeches and articles \n", "3 Carrie Chapman Catt Papers Speeches and articles \n", "4 Carrie Chapman Catt Papers Speeches and articles \n", "\n", " Item ItemId \\\n", "0 Carrie Chapman Catt Papers: Speech and Article... mss154040385 \n", "1 Carrie Chapman Catt Papers: Speech and Article... mss154040385 \n", "2 Carrie Chapman Catt Papers: Speech and Article... mss154040385 \n", "3 Carrie Chapman Catt Papers: Speech and Article... mss154040386 \n", "4 Carrie Chapman Catt Papers: Speech and Article... mss154040386 \n", "\n", " Asset AssetId AssetStatus \\\n", "0 mss154040385-1 189284 completed \n", "1 mss154040385-2 189285 completed \n", "2 mss154040385-3 189286 completed \n", "3 mss154040386-1 189287 completed \n", "4 mss154040386-2 189288 completed \n", "\n", " DownloadUrl \\\n", "0 http://tile.loc.gov/image-services/iiif/servic... \n", "1 http://tile.loc.gov/image-services/iiif/servic... \n", "2 http://tile.loc.gov/image-services/iiif/servic... \n", "3 http://tile.loc.gov/image-services/iiif/servic... \n", "4 http://tile.loc.gov/image-services/iiif/servic... \n", "\n", " Transcription Tags \\\n", "0 CATT, Carrie Chapman\\r\\nSPEECH, ARTICLE, BOOK ... NaN \n", "1 -2-\\r\\nWe appeal in the name of our foremother... NaN \n", "2 AN APPEAL FOR LIBERTY. 1915\\r\\n\\r\\nBy Carri... NaN \n", "3 CATT, Carrie Chapman\\r\\nSPEECH, ARTICLE, BOOK ... NaN \n", "4 The \\r\\nWoman Citizen\\r\\nA WEEKLY CHRONICLE OF... NaN \n", "\n", " tokenized_text \\\n", "0 [(CATT, CATT, PROPN, NNP, ROOT, XXXX, True, Fa... \n", "1 [(-2-, -2-, PUNCT, ``, punct, -d-, False, Fals... \n", "2 [(AN, an, DET, DT, det, XX, True, True), (APPE... \n", "3 [(CATT, CATT, PROPN, NNP, ROOT, XXXX, True, Fa... \n", "4 [(The, the, DET, DT, det, Xxx, True, True), (\\... \n", "\n", " entities \\\n", "0 [(CATT, 0, 4, PERSON), (Carrie Chapman\\r\\nSPEE... \n", "1 [(-2-\\r\\n, 0, 5, PERSON), (American, 428, 436,... \n", "2 [(1915, 26, 30, DATE), (Carrie Chapman Catt, 3... \n", "3 [(CATT, 0, 4, PERSON), (Carrie Chapman\\r\\nSPEE... \n", "4 [(Carrie Chapman Catt, 156, 175, PERSON), (Con... \n", "\n", " text \\\n", "0 [(CATT, CATT, PROPN, NNP, ROOT, XXXX, True, Fa... \n", "1 [(appeal, appeal, VERB, VBP, ccomp, xxxx, True... \n", "2 [(APPEAL, APPEAL, PROPN, NNP, ROOT, XXXX, True... \n", "3 [(CATT, CATT, PROPN, NNP, ROOT, XXXX, True, Fa... \n", "4 [(Woman, Woman, PROPN, NNP, compound, Xxxxx, T... \n", "\n", " stop_words \\\n", "0 [(An, an, DET, DT, det, Xx, True, True), (For,... \n", "1 [(We, we, PRON, PRP, nsubj, Xx, True, True), (... \n", "2 [(AN, an, DET, DT, det, XX, True, True), (FOR,... \n", "3 [(Be, be, AUX, VB, ROOT, Xx, True, True)] \n", "4 [(The, the, DET, DT, det, Xxx, True, True), (A... \n", "\n", " nonalphanums \\\n", "0 [(,, ,, PUNCT, ,, punct, ,, False, False), (\\r... \n", "1 [(-2-, -2-, PUNCT, ``, punct, -d-, False, Fals... \n", "2 [(., ., PUNCT, ., punct, ., False, False), ( ... \n", "3 [(,, ,, PUNCT, ,, punct, ,, False, False), (\\r... \n", "4 [(\\r\\n, \\r\\n, SPACE, _SP, dep, \\r\\n, False, Fa... \n", "\n", " numbers ambigs \\\n", "0 [] [] \n", "1 [(1,600,000, 1,600,000, NUM, CD, nummod, d,ddd... [] \n", "2 [(1915, 1915, NUM, CD, ROOT, dddd, False, Fals... [] \n", "3 [] [] \n", "4 [(21, 21, NUM, CD, nummod, dd, False, False), ... [] \n", "\n", " processed_text \n", "0 [catt, carrie, chapman, speech, article, book,... \n", "1 [appeal, foremother, forefather, equal, courag... \n", "2 [appeal, liberty, carrie, chapman, catt, year,... \n", "3 [catt, carrie, chapman, speech, article, book,... \n", "4 [woman, citizen, weekly, chronicle, progress, ... " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c.iloc[0:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### First five rows of updated Stanton dataset" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CampaignProjectItemItemIdAssetAssetIdAssetStatusDownloadUrlTranscriptionTagstokenized_textentitiestextstop_wordsnonalphanumsnumbersambigsprocessed_text
0Elizabeth Cady Stanton PapersGeneral correspondenceElizabeth Cady Stanton Papers: General Corresp...mss412100001mss412100001-1179712completedhttp://tile.loc.gov/image-services/iiif/servic...Elizabeth Cady Stanton GENERAL CORRESPONDENCE...NaN[(Elizabeth, Elizabeth, PROPN, NNP, compound, ...[(Elizabeth Cady Stanton, 0, 22, PERSON), (181...[(Elizabeth, Elizabeth, PROPN, NNP, compound, ...[][( , , SPACE, _SP, dep, , False, False), (-,...[(1814, 1814, NUM, CD, appos, dddd, False, Fal...[][elizabeth, cady, stanton, general, correspond...
1Elizabeth Cady Stanton PapersGeneral correspondenceElizabeth Cady Stanton Papers: General Corresp...mss412100001mss412100001-2179713completedhttp://tile.loc.gov/image-services/iiif/servic...The following four letters are \\r\\nfrom Daniel...Peter Smith; Daniel Cady; Judge Cady[(The, the, DET, DT, det, Xxx, True, True), (f...[(four, 14, 18, CARDINAL), (Daniel Cady, 38, 4...[(following, follow, VERB, VBG, amod, xxxx, Tr...[(The, the, DET, DT, det, Xxx, True, True), (f...[(\\r\\n, \\r\\n, SPACE, _SP, dep, \\r\\n, False, Fa...[][][follow, letter, daniel, cady, peter, smith, j...
2Elizabeth Cady Stanton PapersGeneral correspondenceElizabeth Cady Stanton Papers: General Corresp...mss412100001mss412100001-3179714completedhttp://tile.loc.gov/image-services/iiif/servic...22 ...NaN[(22, 22, NUM, CD, ROOT, dd, False, False), ( ...[(22, 0, 2, CARDINAL), (2 Dec. 1814, 91, 102, ...[(Dec., Dec., PROPN, NNP, npadvmod, Xxx., Fals...[(It, it, PRON, PRP, nsubj, Xx, True, True), (...[( ...[(22, 22, NUM, CD, ROOT, dd, False, False), (2...[][dec., dear, sir, true, lose, young, child, th...
3Elizabeth Cady Stanton PapersGeneral correspondenceElizabeth Cady Stanton Papers: General Corresp...mss412100001mss412100001-4179715completedhttp://tile.loc.gov/image-services/iiif/servic...he could to make her respectable & happy. That...Peter Smith; Bonaparte[(he, he, PRON, PRP, nsubj, xx, True, True), (...[(one, 467, 470, CARDINAL), (one, 614, 617, CA...[(respectable, respectable, ADJ, JJ, ccomp, xx...[(he, he, PRON, PRP, nsubj, xx, True, True), (...[(&, &, CCONJ, CC, cc, &, False, False), (., ....[(2d, 2d, NUM, CD, nummod, dx, False, False), ...[][respectable, happy, moment, flatter, soon, se...
4Elizabeth Cady Stanton PapersGeneral correspondenceElizabeth Cady Stanton Papers: General Corresp...mss412100001mss412100001-5179716completedhttp://tile.loc.gov/image-services/iiif/servic...Johnstown 2 D Paid 10\\r\\n\\r\\n\\r\\nPeter Smi...Peter Smith[(Johnstown, Johnstown, PROPN, NNP, nmod, Xxxx...[(Johnstown, 0, 9, GPE), (10, 23, 25, CARDINAL...[(Johnstown, Johnstown, PROPN, NNP, nmod, Xxxx...[][( , , SPACE, _SP, dep, , False, Fa...[(2, 2, NUM, CD, nummod, d, False, False), (10...[][johnstown, d, paid, peter, smith, esquire, pe...
\n", "
" ], "text/plain": [ " Campaign Project \\\n", "0 Elizabeth Cady Stanton Papers General correspondence \n", "1 Elizabeth Cady Stanton Papers General correspondence \n", "2 Elizabeth Cady Stanton Papers General correspondence \n", "3 Elizabeth Cady Stanton Papers General correspondence \n", "4 Elizabeth Cady Stanton Papers General correspondence \n", "\n", " Item ItemId \\\n", "0 Elizabeth Cady Stanton Papers: General Corresp... mss412100001 \n", "1 Elizabeth Cady Stanton Papers: General Corresp... mss412100001 \n", "2 Elizabeth Cady Stanton Papers: General Corresp... mss412100001 \n", "3 Elizabeth Cady Stanton Papers: General Corresp... mss412100001 \n", "4 Elizabeth Cady Stanton Papers: General Corresp... mss412100001 \n", "\n", " Asset AssetId AssetStatus \\\n", "0 mss412100001-1 179712 completed \n", "1 mss412100001-2 179713 completed \n", "2 mss412100001-3 179714 completed \n", "3 mss412100001-4 179715 completed \n", "4 mss412100001-5 179716 completed \n", "\n", " DownloadUrl \\\n", "0 http://tile.loc.gov/image-services/iiif/servic... \n", "1 http://tile.loc.gov/image-services/iiif/servic... \n", "2 http://tile.loc.gov/image-services/iiif/servic... \n", "3 http://tile.loc.gov/image-services/iiif/servic... \n", "4 http://tile.loc.gov/image-services/iiif/servic... \n", "\n", " Transcription \\\n", "0 Elizabeth Cady Stanton GENERAL CORRESPONDENCE... \n", "1 The following four letters are \\r\\nfrom Daniel... \n", "2 22 ... \n", "3 he could to make her respectable & happy. That... \n", "4 Johnstown 2 D Paid 10\\r\\n\\r\\n\\r\\nPeter Smi... \n", "\n", " Tags \\\n", "0 NaN \n", "1 Peter Smith; Daniel Cady; Judge Cady \n", "2 NaN \n", "3 Peter Smith; Bonaparte \n", "4 Peter Smith \n", "\n", " tokenized_text \\\n", "0 [(Elizabeth, Elizabeth, PROPN, NNP, compound, ... \n", "1 [(The, the, DET, DT, det, Xxx, True, True), (f... \n", "2 [(22, 22, NUM, CD, ROOT, dd, False, False), ( ... \n", "3 [(he, he, PRON, PRP, nsubj, xx, True, True), (... \n", "4 [(Johnstown, Johnstown, PROPN, NNP, nmod, Xxxx... \n", "\n", " entities \\\n", "0 [(Elizabeth Cady Stanton, 0, 22, PERSON), (181... \n", "1 [(four, 14, 18, CARDINAL), (Daniel Cady, 38, 4... \n", "2 [(22, 0, 2, CARDINAL), (2 Dec. 1814, 91, 102, ... \n", "3 [(one, 467, 470, CARDINAL), (one, 614, 617, CA... \n", "4 [(Johnstown, 0, 9, GPE), (10, 23, 25, CARDINAL... \n", "\n", " text \\\n", "0 [(Elizabeth, Elizabeth, PROPN, NNP, compound, ... \n", "1 [(following, follow, VERB, VBG, amod, xxxx, Tr... \n", "2 [(Dec., Dec., PROPN, NNP, npadvmod, Xxx., Fals... \n", "3 [(respectable, respectable, ADJ, JJ, ccomp, xx... \n", "4 [(Johnstown, Johnstown, PROPN, NNP, nmod, Xxxx... \n", "\n", " stop_words \\\n", "0 [] \n", "1 [(The, the, DET, DT, det, Xxx, True, True), (f... \n", "2 [(It, it, PRON, PRP, nsubj, Xx, True, True), (... \n", "3 [(he, he, PRON, PRP, nsubj, xx, True, True), (... \n", "4 [] \n", "\n", " nonalphanums \\\n", "0 [( , , SPACE, _SP, dep, , False, False), (-,... \n", "1 [(\\r\\n, \\r\\n, SPACE, _SP, dep, \\r\\n, False, Fa... \n", "2 [( ... \n", "3 [(&, &, CCONJ, CC, cc, &, False, False), (., .... \n", "4 [( , , SPACE, _SP, dep, , False, Fa... \n", "\n", " numbers ambigs \\\n", "0 [(1814, 1814, NUM, CD, appos, dddd, False, Fal... [] \n", "1 [] [] \n", "2 [(22, 22, NUM, CD, ROOT, dd, False, False), (2... [] \n", "3 [(2d, 2d, NUM, CD, nummod, dx, False, False), ... [] \n", "4 [(2, 2, NUM, CD, nummod, d, False, False), (10... [] \n", "\n", " processed_text \n", "0 [elizabeth, cady, stanton, general, correspond... \n", "1 [follow, letter, daniel, cady, peter, smith, j... \n", "2 [dec., dear, sir, true, lose, young, child, th... \n", "3 [respectable, happy, moment, flatter, soon, se... \n", "4 [johnstown, d, paid, peter, smith, esquire, pe... " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s.iloc[0:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### First five rows of updated Terrell dataset" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CampaignProjectItemItemIdAssetAssetIdAssetStatusDownloadUrlTranscriptionTagstokenized_textentitiestextstop_wordsnonalphanumsnumbersambigsprocessed_text
0Mary Church Terrell: Advocate for African Amer...Address and appointment booksMary Church Terrell Papers: Appointment Calend...mss425490014mss425490014-17580completedhttp://tile.loc.gov/image-services/iiif/servic...Office Supplies typewriter ribbons fountain pe...Mrs Ella Wheeler Wilcox; Woman Suffrage Conven...[(Office, office, NOUN, NN, compound, Xxxxx, T...[(Swett, 101, 106, PERSON), (Stationery Blank ...[(Office, office, NOUN, NN, compound, Xxxxx, T...[(’s, ’s, PART, POS, case, ’x, False, True), (...[(\\r\\n, \\r\\n, SPACE, _SP, dep, \\r\\n, False, Fa...[(603, 603, NUM, CD, nummod, ddd, False, False...[][office, supply, typewriter, ribbon, fountain,...
1Mary Church Terrell: Advocate for African Amer...Address and appointment booksMary Church Terrell Papers: Appointment Calend...mss425490014mss425490014-27581completedhttp://tile.loc.gov/image-services/iiif/servic...March 16, Wednesday,1904 - Dr. Booker Washingt...Cruger; Calloway; VanRensselaer; Booker; Washi...[(March, March, PROPN, NNP, npadvmod, Xxxxx, T...[(March 16, 0, 8, DATE), (Booker, 31, 37, PERS...[(March, March, PROPN, NNP, npadvmod, Xxxxx, T...[(as, as, ADP, IN, prep, xx, True, True), (our...[(,, ,, PUNCT, ,, punct, ,, False, False), (-,...[(16, 16, NUM, CD, nummod, dd, False, False), ...[][march, wednesday,1904, dr., booker, washingto...
2Mary Church Terrell: Advocate for African Amer...Address and appointment booksMary Church Terrell Papers: Appointment Calend...mss425490014mss425490014-37582completedhttp://tile.loc.gov/image-services/iiif/servic...Fountain Pens Repaired\\r\\nTablets\\r\\nTypewrite...Pennsylvania; committee; Washington Post[(Fountain, Fountain, PROPN, NNP, compound, Xx...[(Fountain Pens Repaired\\r\\nTablets\\r\\nTypewri...[(Fountain, Fountain, PROPN, NNP, compound, Xx...[('s, 's, PART, POS, case, 'x, False, True), (...[(\\r\\n, \\r\\n, SPACE, _SP, dep, \\r\\n, False, Fa...[(603, 603, NUM, CD, nummod, ddd, False, False...[(?, ?, ADJ, JJ, punct, ?, False, False), (Wi?...[fountain, pens, repaired, tablet, typewriter,...
3Mary Church Terrell: Advocate for African Amer...Address and appointment booksMary Church Terrell Papers: Appointment Calend...mss425490014mss425490014-47583completedhttp://tile.loc.gov/image-services/iiif/servic...May, 1904\\r\\n\\r\\n1 SUNDAY Received invitation ...NaN[(May, May, PROPN, NNP, nmod, Xxx, True, True)...[(May, 1904, 0, 9, DATE), (1, 13, 14, CARDINAL...[(SUNDAY, SUNDAY, PROPN, NNP, appos, XXXX, Tru...[(May, May, PROPN, NNP, nmod, Xxx, True, True)...[(,, ,, PUNCT, ,, punct, ,, False, False), (\\r...[(1904, 1904, NUM, CD, nummod, dddd, False, Fa...[][sunday, receive, invitation, fran, olga, mr, ...
4Mary Church Terrell: Advocate for African Amer...Address and appointment booksMary Church Terrell Papers: Appointment Calend...mss425490014mss425490014-57584completedhttp://tile.loc.gov/image-services/iiif/servic...June, 1904\\r\\n\\r\\n7 TUESDAY Reached Bremer Hav...Berlin; Congress morning; June 1904; Paris[(June, June, PROPN, NNP, npadvmod, Xxxx, True...[(June, 1904, 0, 10, DATE), (7, 14, 15, CARDIN...[(June, June, PROPN, NNP, npadvmod, Xxxx, True...[(in, in, ADP, IN, prep, xx, True, True), (at,...[(,, ,, PUNCT, ,, punct, ,, False, False), (\\r...[(1904, 1904, NUM, CD, nummod, dddd, False, Fa...[][june, tuesday, reach, bremer, haven, morning,...
\n", "
" ], "text/plain": [ " Campaign \\\n", "0 Mary Church Terrell: Advocate for African Amer... \n", "1 Mary Church Terrell: Advocate for African Amer... \n", "2 Mary Church Terrell: Advocate for African Amer... \n", "3 Mary Church Terrell: Advocate for African Amer... \n", "4 Mary Church Terrell: Advocate for African Amer... \n", "\n", " Project \\\n", "0 Address and appointment books \n", "1 Address and appointment books \n", "2 Address and appointment books \n", "3 Address and appointment books \n", "4 Address and appointment books \n", "\n", " Item ItemId \\\n", "0 Mary Church Terrell Papers: Appointment Calend... mss425490014 \n", "1 Mary Church Terrell Papers: Appointment Calend... mss425490014 \n", "2 Mary Church Terrell Papers: Appointment Calend... mss425490014 \n", "3 Mary Church Terrell Papers: Appointment Calend... mss425490014 \n", "4 Mary Church Terrell Papers: Appointment Calend... mss425490014 \n", "\n", " Asset AssetId AssetStatus \\\n", "0 mss425490014-1 7580 completed \n", "1 mss425490014-2 7581 completed \n", "2 mss425490014-3 7582 completed \n", "3 mss425490014-4 7583 completed \n", "4 mss425490014-5 7584 completed \n", "\n", " DownloadUrl \\\n", "0 http://tile.loc.gov/image-services/iiif/servic... \n", "1 http://tile.loc.gov/image-services/iiif/servic... \n", "2 http://tile.loc.gov/image-services/iiif/servic... \n", "3 http://tile.loc.gov/image-services/iiif/servic... \n", "4 http://tile.loc.gov/image-services/iiif/servic... \n", "\n", " Transcription \\\n", "0 Office Supplies typewriter ribbons fountain pe... \n", "1 March 16, Wednesday,1904 - Dr. Booker Washingt... \n", "2 Fountain Pens Repaired\\r\\nTablets\\r\\nTypewrite... \n", "3 May, 1904\\r\\n\\r\\n1 SUNDAY Received invitation ... \n", "4 June, 1904\\r\\n\\r\\n7 TUESDAY Reached Bremer Hav... \n", "\n", " Tags \\\n", "0 Mrs Ella Wheeler Wilcox; Woman Suffrage Conven... \n", "1 Cruger; Calloway; VanRensselaer; Booker; Washi... \n", "2 Pennsylvania; committee; Washington Post \n", "3 NaN \n", "4 Berlin; Congress morning; June 1904; Paris \n", "\n", " tokenized_text \\\n", "0 [(Office, office, NOUN, NN, compound, Xxxxx, T... \n", "1 [(March, March, PROPN, NNP, npadvmod, Xxxxx, T... \n", "2 [(Fountain, Fountain, PROPN, NNP, compound, Xx... \n", "3 [(May, May, PROPN, NNP, nmod, Xxx, True, True)... \n", "4 [(June, June, PROPN, NNP, npadvmod, Xxxx, True... \n", "\n", " entities \\\n", "0 [(Swett, 101, 106, PERSON), (Stationery Blank ... \n", "1 [(March 16, 0, 8, DATE), (Booker, 31, 37, PERS... \n", "2 [(Fountain Pens Repaired\\r\\nTablets\\r\\nTypewri... \n", "3 [(May, 1904, 0, 9, DATE), (1, 13, 14, CARDINAL... \n", "4 [(June, 1904, 0, 10, DATE), (7, 14, 15, CARDIN... \n", "\n", " text \\\n", "0 [(Office, office, NOUN, NN, compound, Xxxxx, T... \n", "1 [(March, March, PROPN, NNP, npadvmod, Xxxxx, T... \n", "2 [(Fountain, Fountain, PROPN, NNP, compound, Xx... \n", "3 [(SUNDAY, SUNDAY, PROPN, NNP, appos, XXXX, Tru... \n", "4 [(June, June, PROPN, NNP, npadvmod, Xxxx, True... \n", "\n", " stop_words \\\n", "0 [(’s, ’s, PART, POS, case, ’x, False, True), (... \n", "1 [(as, as, ADP, IN, prep, xx, True, True), (our... \n", "2 [('s, 's, PART, POS, case, 'x, False, True), (... \n", "3 [(May, May, PROPN, NNP, nmod, Xxx, True, True)... \n", "4 [(in, in, ADP, IN, prep, xx, True, True), (at,... \n", "\n", " nonalphanums \\\n", "0 [(\\r\\n, \\r\\n, SPACE, _SP, dep, \\r\\n, False, Fa... \n", "1 [(,, ,, PUNCT, ,, punct, ,, False, False), (-,... \n", "2 [(\\r\\n, \\r\\n, SPACE, _SP, dep, \\r\\n, False, Fa... \n", "3 [(,, ,, PUNCT, ,, punct, ,, False, False), (\\r... \n", "4 [(,, ,, PUNCT, ,, punct, ,, False, False), (\\r... \n", "\n", " numbers \\\n", "0 [(603, 603, NUM, CD, nummod, ddd, False, False... \n", "1 [(16, 16, NUM, CD, nummod, dd, False, False), ... \n", "2 [(603, 603, NUM, CD, nummod, ddd, False, False... \n", "3 [(1904, 1904, NUM, CD, nummod, dddd, False, Fa... \n", "4 [(1904, 1904, NUM, CD, nummod, dddd, False, Fa... \n", "\n", " ambigs \\\n", "0 [] \n", "1 [] \n", "2 [(?, ?, ADJ, JJ, punct, ?, False, False), (Wi?... \n", "3 [] \n", "4 [] \n", "\n", " processed_text \n", "0 [office, supply, typewriter, ribbon, fountain,... \n", "1 [march, wednesday,1904, dr., booker, washingto... \n", "2 [fountain, pens, repaired, tablet, typewriter,... \n", "3 [sunday, receive, invitation, fran, olga, mr, ... \n", "4 [june, tuesday, reach, bremer, haven, morning,... " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t.iloc[0:5]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 4 }