President Words - Ipynb

You might also like

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 8

{

"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- For every president, finding out how many unique words they used in speech
using inaugural address package. \n",
"- Assuming that President Bush means one person"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from nltk.corpus import inaugural"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"speech_list = inaugural.fileids()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['1789-Washington.txt', '1793-Washington.txt', '1797-Adams.txt', '1801-
Jefferson.txt', '1805-Jefferson.txt', '1809-Madison.txt', '1813-Madison.txt',
'1817-Monroe.txt', '1821-Monroe.txt', '1825-Adams.txt', '1829-Jackson.txt', '1833-
Jackson.txt', '1837-VanBuren.txt', '1841-Harrison.txt', '1845-Polk.txt', '1849-
Taylor.txt', '1853-Pierce.txt', '1857-Buchanan.txt', '1861-Lincoln.txt', '1865-
Lincoln.txt', '1869-Grant.txt', '1873-Grant.txt', '1877-Hayes.txt', '1881-
Garfield.txt', '1885-Cleveland.txt', '1889-Harrison.txt', '1893-Cleveland.txt',
'1897-McKinley.txt', '1901-McKinley.txt', '1905-Roosevelt.txt', '1909-Taft.txt',
'1913-Wilson.txt', '1917-Wilson.txt', '1921-Harding.txt', '1925-Coolidge.txt',
'1929-Hoover.txt', '1933-Roosevelt.txt', '1937-Roosevelt.txt', '1941-
Roosevelt.txt', '1945-Roosevelt.txt', '1949-Truman.txt', '1953-Eisenhower.txt',
'1957-Eisenhower.txt', '1961-Kennedy.txt', '1965-Johnson.txt', '1969-Nixon.txt',
'1973-Nixon.txt', '1977-Carter.txt', '1981-Reagan.txt', '1985-Reagan.txt', '1989-
Bush.txt', '1993-Clinton.txt', '1997-Clinton.txt', '2001-Bush.txt', '2005-
Bush.txt', '2009-Obama.txt']\n"
]
}
],
"source": [
"print(speech_list)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"president_words_list = []"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"for file in speech_list: # Merging the files first!\n",
" name = file[5:-4] \n",
" words = inaugural.words(file)\n",
" if len(president_words_list) == 0:\n",
" president_words_list.append([name, words])\n",
" else: \n",
" for president in president_words_list:\n",
" if name == president[0]: \n",
" president[1] += (words)\n",
" break\n",
" else: \n",
" president_words_list.append([name, words])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1685\n"
]
}
],
"source": [
"print(len(president_words_list[0][1]))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"word_count_list = []"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"for president in president_words_list: # Taking unique words.\n",
" name = president[0]\n",
" words = set(president[1])\n",
" word_count_list.append((name, len(words)))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The Number of Unique Words Each President Used in Their Speeches:\n",
" [('Washington', 663), ('Adams', 1503), ('Jefferson', 1261), ('Madison',
899), ('Monroe', 1781), ('Jackson', 846), ('VanBuren', 1318), ('Harrison', 2662),
('Polk', 1333), ('Taylor', 499), ('Pierce', 1168), ('Buchanan', 948), ('Lincoln',
1261), ('Grant', 855), ('Hayes', 832), ('Garfield', 1022), ('Cleveland', 1219),
('McKinley', 1682), ('Roosevelt', 1704), ('Taft', 1439), ('Wilson', 996),
('Harding', 1170), ('Coolidge', 1221), ('Hoover', 1087), ('Truman', 781),
('Eisenhower', 1250), ('Kennedy', 570), ('Johnson', 571), ('Nixon', 1037),
('Carter', 529), ('Reagan', 1453), ('Bush', 1578), ('Clinton', 1138), ('Obama',
938)]\n"
]
}
],
"source": [
"print('The Number of Unique Words Each President Used in Their Speeches:\\n',
word_count_list)"
]
},
{
"cell_type": "code",
"execution_count": 156,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd # Creating a simple DataFrame to show the
results"
]
},
{
"cell_type": "code",
"execution_count": 157,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = pd.DataFrame(word_count_list, index=range(1,35), columns=['Presidents',
'words count'])"
]
},
{
"cell_type": "code",
"execution_count": 158,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Presidents</th>\n",
" <th>words count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Washington</td>\n",
" <td>663</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Adams</td>\n",
" <td>1503</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Jefferson</td>\n",
" <td>1261</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Madison</td>\n",
" <td>899</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Monroe</td>\n",
" <td>1781</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Jackson</td>\n",
" <td>846</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>VanBuren</td>\n",
" <td>1318</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Harrison</td>\n",
" <td>2662</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Polk</td>\n",
" <td>1333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Taylor</td>\n",
" <td>499</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Pierce</td>\n",
" <td>1168</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Buchanan</td>\n",
" <td>948</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Lincoln</td>\n",
" <td>1261</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Grant</td>\n",
" <td>855</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Hayes</td>\n",
" <td>832</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Garfield</td>\n",
" <td>1022</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Cleveland</td>\n",
" <td>1219</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>McKinley</td>\n",
" <td>1682</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Roosevelt</td>\n",
" <td>1704</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Taft</td>\n",
" <td>1439</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Wilson</td>\n",
" <td>996</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>Harding</td>\n",
" <td>1170</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Coolidge</td>\n",
" <td>1221</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>Hoover</td>\n",
" <td>1087</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>Truman</td>\n",
" <td>781</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Eisenhower</td>\n",
" <td>1250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>Kennedy</td>\n",
" <td>570</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>Johnson</td>\n",
" <td>571</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>Nixon</td>\n",
" <td>1037</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>Carter</td>\n",
" <td>529</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>Reagan</td>\n",
" <td>1453</td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td>Bush</td>\n",
" <td>1578</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>Clinton</td>\n",
" <td>1138</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>Obama</td>\n",
" <td>938</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Presidents words count\n",
"1 Washington 663\n",
"2 Adams 1503\n",
"3 Jefferson 1261\n",
"4 Madison 899\n",
"5 Monroe 1781\n",
"6 Jackson 846\n",
"7 VanBuren 1318\n",
"8 Harrison 2662\n",
"9 Polk 1333\n",
"10 Taylor 499\n",
"11 Pierce 1168\n",
"12 Buchanan 948\n",
"13 Lincoln 1261\n",
"14 Grant 855\n",
"15 Hayes 832\n",
"16 Garfield 1022\n",
"17 Cleveland 1219\n",
"18 McKinley 1682\n",
"19 Roosevelt 1704\n",
"20 Taft 1439\n",
"21 Wilson 996\n",
"22 Harding 1170\n",
"23 Coolidge 1221\n",
"24 Hoover 1087\n",
"25 Truman 781\n",
"26 Eisenhower 1250\n",
"27 Kennedy 570\n",
"28 Johnson 571\n",
"29 Nixon 1037\n",
"30 Carter 529\n",
"31 Reagan 1453\n",
"32 Bush 1578\n",
"33 Clinton 1138\n",
"34 Obama 938"
]
},
"execution_count": 158,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

You might also like