Download as pdf or txt
Download as pdf or txt
You are on page 1of 50

downloading MNIST dataset

from sklearn.datasets import fetch_openml

mnist= fetch_openml('mnist_784')

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\datasets\
_openml.py:1002: FutureWarning: The default value of `parser` will
change from `'liac-arff'` to `'auto'` in 1.4. You can set
`parser='auto'` to silence this warning. Therefore, an `ImportError`
will be raised from 1.4 if the dataset is dense and pandas is not
installed. Note that the pandas parser may return different data
types. See the Notes Section in fetch_openml's API doc for details.
warn(

mnist

{'data': pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7


pixel8 pixel9 \
0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
... ... ... ... ... ... ... ... ...
...
69995 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69996 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69997 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69998 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69999 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0

pixel10 ... pixel775 pixel776 pixel777 pixel778 pixel779


\
0 0.0 ... 0.0 0.0 0.0 0.0 0.0

1 0.0 ... 0.0 0.0 0.0 0.0 0.0


2 0.0 ... 0.0 0.0 0.0 0.0 0.0

3 0.0 ... 0.0 0.0 0.0 0.0 0.0

4 0.0 ... 0.0 0.0 0.0 0.0 0.0

... ... ... ... ... ... ... ...

69995 0.0 ... 0.0 0.0 0.0 0.0 0.0

69996 0.0 ... 0.0 0.0 0.0 0.0 0.0

69997 0.0 ... 0.0 0.0 0.0 0.0 0.0

69998 0.0 ... 0.0 0.0 0.0 0.0 0.0

69999 0.0 ... 0.0 0.0 0.0 0.0 0.0

pixel780 pixel781 pixel782 pixel783 pixel784


0 0.0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0 0.0
4 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ...
69995 0.0 0.0 0.0 0.0 0.0
69996 0.0 0.0 0.0 0.0 0.0
69997 0.0 0.0 0.0 0.0 0.0
69998 0.0 0.0 0.0 0.0 0.0
69999 0.0 0.0 0.0 0.0 0.0

[70000 rows x 784 columns],


'target': 0 5
1 0
2 4
3 1
4 9
..
69995 2
69996 3
69997 4
69998 5
69999 6
Name: class, Length: 70000, dtype: category
Categories (10, object): ['0', '1', '2', '3', ..., '6', '7', '8',
'9'],
'frame': pixel1 pixel2 pixel3 pixel4 pixel5 pixel6
pixel7 pixel8 pixel9 \
0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
... ... ... ... ... ... ... ... ...
...
69995 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69996 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69997 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69998 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69999 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0

pixel10 ... pixel776 pixel777 pixel778 pixel779 pixel780


\
0 0.0 ... 0.0 0.0 0.0 0.0 0.0

1 0.0 ... 0.0 0.0 0.0 0.0 0.0

2 0.0 ... 0.0 0.0 0.0 0.0 0.0

3 0.0 ... 0.0 0.0 0.0 0.0 0.0

4 0.0 ... 0.0 0.0 0.0 0.0 0.0

... ... ... ... ... ... ... ...

69995 0.0 ... 0.0 0.0 0.0 0.0 0.0

69996 0.0 ... 0.0 0.0 0.0 0.0 0.0

69997 0.0 ... 0.0 0.0 0.0 0.0 0.0

69998 0.0 ... 0.0 0.0 0.0 0.0 0.0

69999 0.0 ... 0.0 0.0 0.0 0.0 0.0

pixel781 pixel782 pixel783 pixel784 class


0 0.0 0.0 0.0 0.0 5
1 0.0 0.0 0.0 0.0 0
2 0.0 0.0 0.0 0.0 4
3 0.0 0.0 0.0 0.0 1
4 0.0 0.0 0.0 0.0 9
... ... ... ... ... ...
69995 0.0 0.0 0.0 0.0 2
69996 0.0 0.0 0.0 0.0 3
69997 0.0 0.0 0.0 0.0 4
69998 0.0 0.0 0.0 0.0 5
69999 0.0 0.0 0.0 0.0 6

[70000 rows x 785 columns],


'categories': None,
'feature_names': ['pixel1',
'pixel2',
'pixel3',
'pixel4',
'pixel5',
'pixel6',
'pixel7',
'pixel8',
'pixel9',
'pixel10',
'pixel11',
'pixel12',
'pixel13',
'pixel14',
'pixel15',
'pixel16',
'pixel17',
'pixel18',
'pixel19',
'pixel20',
'pixel21',
'pixel22',
'pixel23',
'pixel24',
'pixel25',
'pixel26',
'pixel27',
'pixel28',
'pixel29',
'pixel30',
'pixel31',
'pixel32',
'pixel33',
'pixel34',
'pixel35',
'pixel36',
'pixel37',
'pixel38',
'pixel39',
'pixel40',
'pixel41',
'pixel42',
'pixel43',
'pixel44',
'pixel45',
'pixel46',
'pixel47',
'pixel48',
'pixel49',
'pixel50',
'pixel51',
'pixel52',
'pixel53',
'pixel54',
'pixel55',
'pixel56',
'pixel57',
'pixel58',
'pixel59',
'pixel60',
'pixel61',
'pixel62',
'pixel63',
'pixel64',
'pixel65',
'pixel66',
'pixel67',
'pixel68',
'pixel69',
'pixel70',
'pixel71',
'pixel72',
'pixel73',
'pixel74',
'pixel75',
'pixel76',
'pixel77',
'pixel78',
'pixel79',
'pixel80',
'pixel81',
'pixel82',
'pixel83',
'pixel84',
'pixel85',
'pixel86',
'pixel87',
'pixel88',
'pixel89',
'pixel90',
'pixel91',
'pixel92',
'pixel93',
'pixel94',
'pixel95',
'pixel96',
'pixel97',
'pixel98',
'pixel99',
'pixel100',
'pixel101',
'pixel102',
'pixel103',
'pixel104',
'pixel105',
'pixel106',
'pixel107',
'pixel108',
'pixel109',
'pixel110',
'pixel111',
'pixel112',
'pixel113',
'pixel114',
'pixel115',
'pixel116',
'pixel117',
'pixel118',
'pixel119',
'pixel120',
'pixel121',
'pixel122',
'pixel123',
'pixel124',
'pixel125',
'pixel126',
'pixel127',
'pixel128',
'pixel129',
'pixel130',
'pixel131',
'pixel132',
'pixel133',
'pixel134',
'pixel135',
'pixel136',
'pixel137',
'pixel138',
'pixel139',
'pixel140',
'pixel141',
'pixel142',
'pixel143',
'pixel144',
'pixel145',
'pixel146',
'pixel147',
'pixel148',
'pixel149',
'pixel150',
'pixel151',
'pixel152',
'pixel153',
'pixel154',
'pixel155',
'pixel156',
'pixel157',
'pixel158',
'pixel159',
'pixel160',
'pixel161',
'pixel162',
'pixel163',
'pixel164',
'pixel165',
'pixel166',
'pixel167',
'pixel168',
'pixel169',
'pixel170',
'pixel171',
'pixel172',
'pixel173',
'pixel174',
'pixel175',
'pixel176',
'pixel177',
'pixel178',
'pixel179',
'pixel180',
'pixel181',
'pixel182',
'pixel183',
'pixel184',
'pixel185',
'pixel186',
'pixel187',
'pixel188',
'pixel189',
'pixel190',
'pixel191',
'pixel192',
'pixel193',
'pixel194',
'pixel195',
'pixel196',
'pixel197',
'pixel198',
'pixel199',
'pixel200',
'pixel201',
'pixel202',
'pixel203',
'pixel204',
'pixel205',
'pixel206',
'pixel207',
'pixel208',
'pixel209',
'pixel210',
'pixel211',
'pixel212',
'pixel213',
'pixel214',
'pixel215',
'pixel216',
'pixel217',
'pixel218',
'pixel219',
'pixel220',
'pixel221',
'pixel222',
'pixel223',
'pixel224',
'pixel225',
'pixel226',
'pixel227',
'pixel228',
'pixel229',
'pixel230',
'pixel231',
'pixel232',
'pixel233',
'pixel234',
'pixel235',
'pixel236',
'pixel237',
'pixel238',
'pixel239',
'pixel240',
'pixel241',
'pixel242',
'pixel243',
'pixel244',
'pixel245',
'pixel246',
'pixel247',
'pixel248',
'pixel249',
'pixel250',
'pixel251',
'pixel252',
'pixel253',
'pixel254',
'pixel255',
'pixel256',
'pixel257',
'pixel258',
'pixel259',
'pixel260',
'pixel261',
'pixel262',
'pixel263',
'pixel264',
'pixel265',
'pixel266',
'pixel267',
'pixel268',
'pixel269',
'pixel270',
'pixel271',
'pixel272',
'pixel273',
'pixel274',
'pixel275',
'pixel276',
'pixel277',
'pixel278',
'pixel279',
'pixel280',
'pixel281',
'pixel282',
'pixel283',
'pixel284',
'pixel285',
'pixel286',
'pixel287',
'pixel288',
'pixel289',
'pixel290',
'pixel291',
'pixel292',
'pixel293',
'pixel294',
'pixel295',
'pixel296',
'pixel297',
'pixel298',
'pixel299',
'pixel300',
'pixel301',
'pixel302',
'pixel303',
'pixel304',
'pixel305',
'pixel306',
'pixel307',
'pixel308',
'pixel309',
'pixel310',
'pixel311',
'pixel312',
'pixel313',
'pixel314',
'pixel315',
'pixel316',
'pixel317',
'pixel318',
'pixel319',
'pixel320',
'pixel321',
'pixel322',
'pixel323',
'pixel324',
'pixel325',
'pixel326',
'pixel327',
'pixel328',
'pixel329',
'pixel330',
'pixel331',
'pixel332',
'pixel333',
'pixel334',
'pixel335',
'pixel336',
'pixel337',
'pixel338',
'pixel339',
'pixel340',
'pixel341',
'pixel342',
'pixel343',
'pixel344',
'pixel345',
'pixel346',
'pixel347',
'pixel348',
'pixel349',
'pixel350',
'pixel351',
'pixel352',
'pixel353',
'pixel354',
'pixel355',
'pixel356',
'pixel357',
'pixel358',
'pixel359',
'pixel360',
'pixel361',
'pixel362',
'pixel363',
'pixel364',
'pixel365',
'pixel366',
'pixel367',
'pixel368',
'pixel369',
'pixel370',
'pixel371',
'pixel372',
'pixel373',
'pixel374',
'pixel375',
'pixel376',
'pixel377',
'pixel378',
'pixel379',
'pixel380',
'pixel381',
'pixel382',
'pixel383',
'pixel384',
'pixel385',
'pixel386',
'pixel387',
'pixel388',
'pixel389',
'pixel390',
'pixel391',
'pixel392',
'pixel393',
'pixel394',
'pixel395',
'pixel396',
'pixel397',
'pixel398',
'pixel399',
'pixel400',
'pixel401',
'pixel402',
'pixel403',
'pixel404',
'pixel405',
'pixel406',
'pixel407',
'pixel408',
'pixel409',
'pixel410',
'pixel411',
'pixel412',
'pixel413',
'pixel414',
'pixel415',
'pixel416',
'pixel417',
'pixel418',
'pixel419',
'pixel420',
'pixel421',
'pixel422',
'pixel423',
'pixel424',
'pixel425',
'pixel426',
'pixel427',
'pixel428',
'pixel429',
'pixel430',
'pixel431',
'pixel432',
'pixel433',
'pixel434',
'pixel435',
'pixel436',
'pixel437',
'pixel438',
'pixel439',
'pixel440',
'pixel441',
'pixel442',
'pixel443',
'pixel444',
'pixel445',
'pixel446',
'pixel447',
'pixel448',
'pixel449',
'pixel450',
'pixel451',
'pixel452',
'pixel453',
'pixel454',
'pixel455',
'pixel456',
'pixel457',
'pixel458',
'pixel459',
'pixel460',
'pixel461',
'pixel462',
'pixel463',
'pixel464',
'pixel465',
'pixel466',
'pixel467',
'pixel468',
'pixel469',
'pixel470',
'pixel471',
'pixel472',
'pixel473',
'pixel474',
'pixel475',
'pixel476',
'pixel477',
'pixel478',
'pixel479',
'pixel480',
'pixel481',
'pixel482',
'pixel483',
'pixel484',
'pixel485',
'pixel486',
'pixel487',
'pixel488',
'pixel489',
'pixel490',
'pixel491',
'pixel492',
'pixel493',
'pixel494',
'pixel495',
'pixel496',
'pixel497',
'pixel498',
'pixel499',
'pixel500',
'pixel501',
'pixel502',
'pixel503',
'pixel504',
'pixel505',
'pixel506',
'pixel507',
'pixel508',
'pixel509',
'pixel510',
'pixel511',
'pixel512',
'pixel513',
'pixel514',
'pixel515',
'pixel516',
'pixel517',
'pixel518',
'pixel519',
'pixel520',
'pixel521',
'pixel522',
'pixel523',
'pixel524',
'pixel525',
'pixel526',
'pixel527',
'pixel528',
'pixel529',
'pixel530',
'pixel531',
'pixel532',
'pixel533',
'pixel534',
'pixel535',
'pixel536',
'pixel537',
'pixel538',
'pixel539',
'pixel540',
'pixel541',
'pixel542',
'pixel543',
'pixel544',
'pixel545',
'pixel546',
'pixel547',
'pixel548',
'pixel549',
'pixel550',
'pixel551',
'pixel552',
'pixel553',
'pixel554',
'pixel555',
'pixel556',
'pixel557',
'pixel558',
'pixel559',
'pixel560',
'pixel561',
'pixel562',
'pixel563',
'pixel564',
'pixel565',
'pixel566',
'pixel567',
'pixel568',
'pixel569',
'pixel570',
'pixel571',
'pixel572',
'pixel573',
'pixel574',
'pixel575',
'pixel576',
'pixel577',
'pixel578',
'pixel579',
'pixel580',
'pixel581',
'pixel582',
'pixel583',
'pixel584',
'pixel585',
'pixel586',
'pixel587',
'pixel588',
'pixel589',
'pixel590',
'pixel591',
'pixel592',
'pixel593',
'pixel594',
'pixel595',
'pixel596',
'pixel597',
'pixel598',
'pixel599',
'pixel600',
'pixel601',
'pixel602',
'pixel603',
'pixel604',
'pixel605',
'pixel606',
'pixel607',
'pixel608',
'pixel609',
'pixel610',
'pixel611',
'pixel612',
'pixel613',
'pixel614',
'pixel615',
'pixel616',
'pixel617',
'pixel618',
'pixel619',
'pixel620',
'pixel621',
'pixel622',
'pixel623',
'pixel624',
'pixel625',
'pixel626',
'pixel627',
'pixel628',
'pixel629',
'pixel630',
'pixel631',
'pixel632',
'pixel633',
'pixel634',
'pixel635',
'pixel636',
'pixel637',
'pixel638',
'pixel639',
'pixel640',
'pixel641',
'pixel642',
'pixel643',
'pixel644',
'pixel645',
'pixel646',
'pixel647',
'pixel648',
'pixel649',
'pixel650',
'pixel651',
'pixel652',
'pixel653',
'pixel654',
'pixel655',
'pixel656',
'pixel657',
'pixel658',
'pixel659',
'pixel660',
'pixel661',
'pixel662',
'pixel663',
'pixel664',
'pixel665',
'pixel666',
'pixel667',
'pixel668',
'pixel669',
'pixel670',
'pixel671',
'pixel672',
'pixel673',
'pixel674',
'pixel675',
'pixel676',
'pixel677',
'pixel678',
'pixel679',
'pixel680',
'pixel681',
'pixel682',
'pixel683',
'pixel684',
'pixel685',
'pixel686',
'pixel687',
'pixel688',
'pixel689',
'pixel690',
'pixel691',
'pixel692',
'pixel693',
'pixel694',
'pixel695',
'pixel696',
'pixel697',
'pixel698',
'pixel699',
'pixel700',
'pixel701',
'pixel702',
'pixel703',
'pixel704',
'pixel705',
'pixel706',
'pixel707',
'pixel708',
'pixel709',
'pixel710',
'pixel711',
'pixel712',
'pixel713',
'pixel714',
'pixel715',
'pixel716',
'pixel717',
'pixel718',
'pixel719',
'pixel720',
'pixel721',
'pixel722',
'pixel723',
'pixel724',
'pixel725',
'pixel726',
'pixel727',
'pixel728',
'pixel729',
'pixel730',
'pixel731',
'pixel732',
'pixel733',
'pixel734',
'pixel735',
'pixel736',
'pixel737',
'pixel738',
'pixel739',
'pixel740',
'pixel741',
'pixel742',
'pixel743',
'pixel744',
'pixel745',
'pixel746',
'pixel747',
'pixel748',
'pixel749',
'pixel750',
'pixel751',
'pixel752',
'pixel753',
'pixel754',
'pixel755',
'pixel756',
'pixel757',
'pixel758',
'pixel759',
'pixel760',
'pixel761',
'pixel762',
'pixel763',
'pixel764',
'pixel765',
'pixel766',
'pixel767',
'pixel768',
'pixel769',
'pixel770',
'pixel771',
'pixel772',
'pixel773',
'pixel774',
'pixel775',
'pixel776',
'pixel777',
'pixel778',
'pixel779',
'pixel780',
'pixel781',
'pixel782',
'pixel783',
'pixel784'],
'target_names': ['class'],
'DESCR': "**Author**: Yann LeCun, Corinna Cortes, Christopher J.C.
Burges \n**Source**: [MNIST
Website](http://yann.lecun.com/exdb/mnist/) - Date unknown \n**Please
cite**: \n\nThe MNIST database of handwritten digits with 784
features, raw data available at: http://yann.lecun.com/exdb/mnist/. It
can be split in a training set of the first 60,000 examples, and a
test set of 10,000 examples \n\nIt is a subset of a larger set
available from NIST. The digits have been size-normalized and centered
in a fixed-size image. It is a good database for people who want to
try learning techniques and pattern recognition methods on real-world
data while spending minimal efforts on preprocessing and formatting.
The original black and white (bilevel) images from NIST were size
normalized to fit in a 20x20 pixel box while preserving their aspect
ratio. The resulting images contain grey levels as a result of the
anti-aliasing technique used by the normalization algorithm. the
images were centered in a 28x28 image by computing the center of mass
of the pixels, and translating the image so as to position this point
at the center of the 28x28 field. \n\nWith some classification
methods (particularly template-based methods, such as SVM and K-
nearest neighbors), the error rate improves when the digits are
centered by bounding box rather than center of mass. If you do this
kind of pre-processing, you should report it in your publications. The
MNIST database was constructed from NIST's NIST originally designated
SD-3 as their training set and SD-1 as their test set. However, SD-3
is much cleaner and easier to recognize than SD-1. The reason for this
can be found on the fact that SD-3 was collected among Census Bureau
employees, while SD-1 was collected among high-school students.
Drawing sensible conclusions from learning experiments requires that
the result be independent of the choice of training set and test among
the complete set of samples. Therefore it was necessary to build a new
database by mixing NIST's datasets. \n\nThe MNIST training set is
composed of 30,000 patterns from SD-3 and 30,000 patterns from SD-1.
Our test set was composed of 5,000 patterns from SD-3 and 5,000
patterns from SD-1. The 60,000 pattern training set contained examples
from approximately 250 writers. We made sure that the sets of writers
of the training set and test set were disjoint. SD-1 contains 58,527
digit images written by 500 different writers. In contrast to SD-3,
where blocks of data from each writer appeared in sequence, the data
in SD-1 is scrambled. Writer identities for SD-1 is available and we
used this information to unscramble the writers. We then split SD-1 in
two: characters written by the first 250 writers went into our new
training set. The remaining 250 writers were placed in our test set.
Thus we had two sets with nearly 30,000 examples each. The new
training set was completed with enough examples from SD-3, starting at
pattern # 0, to make a full set of 60,000 training patterns.
Similarly, the new test set was completed with SD-3 examples starting
at pattern # 35,000 to make a full set with 60,000 test patterns. Only
a subset of 10,000 test images (5,000 from SD-1 and 5,000 from SD-3)
is available on this site. The full 60,000 sample training set is
available.\n\nDownloaded from openml.org.",
'details': {'id': '554',
'name': 'mnist_784',
'version': '1',
'description_version': '2',
'format': 'ARFF',
'creator': ['Yann LeCun', 'Corinna Cortes', 'Christopher J.C.
Burges'],
'upload_date': '2014-09-29T03:28:38',
'language': 'English',
'licence': 'Public',
'url':
'https://api.openml.org/data/v1/download/52667/mnist_784.arff',
'parquet_url':
'https://openml1.win.tue.nl/datasets/0000/0554/dataset_554.pq',
'file_id': '52667',
'default_target_attribute': 'class',
'tag': ['AzurePilot',
'OpenML-CC18',
'OpenML100',
'study_1',
'study_123',
'study_41',
'study_99',
'vision'],
'visibility': 'public',
'minio_url':
'https://openml1.win.tue.nl/datasets/0000/0554/dataset_554.pq',
'status': 'active',
'processing_date': '2020-11-20 20:12:09',
'md5_checksum': '0298d579eb1b86163de7723944c7e495'},
'url': 'https://www.openml.org/d/554'}

x,y=mnist['data'],mnist['target']

x
pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8
pixel9 \
0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
... ... ... ... ... ... ... ... ...
...
69995 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69996 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69997 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69998 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69999 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0

pixel10 ... pixel775 pixel776 pixel777 pixel778 pixel779


\
0 0.0 ... 0.0 0.0 0.0 0.0 0.0

1 0.0 ... 0.0 0.0 0.0 0.0 0.0

2 0.0 ... 0.0 0.0 0.0 0.0 0.0

3 0.0 ... 0.0 0.0 0.0 0.0 0.0

4 0.0 ... 0.0 0.0 0.0 0.0 0.0

... ... ... ... ... ... ... ...

69995 0.0 ... 0.0 0.0 0.0 0.0 0.0

69996 0.0 ... 0.0 0.0 0.0 0.0 0.0

69997 0.0 ... 0.0 0.0 0.0 0.0 0.0

69998 0.0 ... 0.0 0.0 0.0 0.0 0.0

69999 0.0 ... 0.0 0.0 0.0 0.0 0.0

pixel780 pixel781 pixel782 pixel783 pixel784


0 0.0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0 0.0
4 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ...
69995 0.0 0.0 0.0 0.0 0.0
69996 0.0 0.0 0.0 0.0 0.0
69997 0.0 0.0 0.0 0.0 0.0
69998 0.0 0.0 0.0 0.0 0.0
69999 0.0 0.0 0.0 0.0 0.0

[70000 rows x 784 columns]

0 5
1 0
2 4
3 1
4 9
..
69995 2
69996 3
69997 4
69998 5
69999 6
Name: class, Length: 70000, dtype: category
Categories (10, object): ['0', '1', '2', '3', ..., '6', '7', '8', '9']

y[36001]

'2'

y[1000]

'0'

x_train,x_test,y_train,y_test=x[:60000],x[60000:],y[:60000],y[60000:]

x_train

pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8


pixel9 \
0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
... ... ... ... ... ... ... ... ...
...
59995 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
59996 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
59997 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
59998 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
59999 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0

pixel10 ... pixel775 pixel776 pixel777 pixel778 pixel779


\
0 0.0 ... 0.0 0.0 0.0 0.0 0.0

1 0.0 ... 0.0 0.0 0.0 0.0 0.0

2 0.0 ... 0.0 0.0 0.0 0.0 0.0

3 0.0 ... 0.0 0.0 0.0 0.0 0.0

4 0.0 ... 0.0 0.0 0.0 0.0 0.0

... ... ... ... ... ... ... ...

59995 0.0 ... 0.0 0.0 0.0 0.0 0.0

59996 0.0 ... 0.0 0.0 0.0 0.0 0.0

59997 0.0 ... 0.0 0.0 0.0 0.0 0.0

59998 0.0 ... 0.0 0.0 0.0 0.0 0.0

59999 0.0 ... 0.0 0.0 0.0 0.0 0.0

pixel780 pixel781 pixel782 pixel783 pixel784


0 0.0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0 0.0
4 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ...
59995 0.0 0.0 0.0 0.0 0.0
59996 0.0 0.0 0.0 0.0 0.0
59997 0.0 0.0 0.0 0.0 0.0
59998 0.0 0.0 0.0 0.0 0.0
59999 0.0 0.0 0.0 0.0 0.0

[60000 rows x 784 columns]

y_test

60000 7
60001 2
60002 1
60003 0
60004 4
..
69995 2
69996 3
69997 4
69998 5
69999 6
Name: class, Length: 10000, dtype: category
Categories (10, object): ['0', '1', '2', '3', ..., '6', '7', '8', '9']

y_train

0 5
1 0
2 4
3 1
4 9
..
59995 8
59996 3
59997 5
59998 6
59999 8
Name: class, Length: 60000, dtype: category
Categories (10, object): ['0', '1', '2', '3', ..., '6', '7', '8', '9']

x_test

pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8


pixel9 \
60000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
60001 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
60002 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
60003 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
60004 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
... ... ... ... ... ... ... ... ...
...
69995 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69996 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69997 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69998 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0
69999 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
0.0

pixel10 ... pixel775 pixel776 pixel777 pixel778 pixel779


\
60000 0.0 ... 0.0 0.0 0.0 0.0 0.0

60001 0.0 ... 0.0 0.0 0.0 0.0 0.0

60002 0.0 ... 0.0 0.0 0.0 0.0 0.0

60003 0.0 ... 0.0 0.0 0.0 0.0 0.0

60004 0.0 ... 0.0 0.0 0.0 0.0 0.0

... ... ... ... ... ... ... ...

69995 0.0 ... 0.0 0.0 0.0 0.0 0.0

69996 0.0 ... 0.0 0.0 0.0 0.0 0.0

69997 0.0 ... 0.0 0.0 0.0 0.0 0.0

69998 0.0 ... 0.0 0.0 0.0 0.0 0.0

69999 0.0 ... 0.0 0.0 0.0 0.0 0.0

pixel780 pixel781 pixel782 pixel783 pixel784


60000 0.0 0.0 0.0 0.0 0.0
60001 0.0 0.0 0.0 0.0 0.0
60002 0.0 0.0 0.0 0.0 0.0
60003 0.0 0.0 0.0 0.0 0.0
60004 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ...
69995 0.0 0.0 0.0 0.0 0.0
69996 0.0 0.0 0.0 0.0 0.0
69997 0.0 0.0 0.0 0.0 0.0
69998 0.0 0.0 0.0 0.0 0.0
69999 0.0 0.0 0.0 0.0 0.0

[10000 rows x 784 columns]

CREATING A 2 DETECTOR
import numpy as np
y_train=y_train.astype(np.int8)

y_test=y_test.astype(np.int8)

y_train2=(y_train==2)

y_test2=(y_test==2)

y_train

0 5
1 0
2 4
3 1
4 9
..
59995 8
59996 3
59997 5
59998 6
59999 8
Name: class, Length: 60000, dtype: int8

performance Measure
from sklearn.linear_model import LogisticRegression

LGR=LogisticRegression(tol=0.1,solver='lbfgs')

LGR.fit(x_train,y_train2)

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

LogisticRegression(tol=0.1)

pred2=LGR.predict(x_test)

pred2

array([False, True, False, ..., False, False, False])

from sklearn.model_selection import cross_val_score


cross_val_score(LGR, x_train, y_train2, cv=3, scoring="accuracy")

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

array([0.9769 , 0.97725, 0.98015])

from sklearn.metrics import confusion_matrix,accuracy_score


cm2=confusion_matrix(pred2,y_test)
a2=accuracy_score(pred2,y_test)

cm2

array([[ 976, 1132, 145, 997, 978, 892, 951, 1011, 969, 1009],
[ 4, 3, 887, 13, 4, 0, 7, 17, 5, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
dtype=int64)

a2

0.0979

Performance Measure
from sklearn.model_selection import cross_val_predict

y_train_predict = cross_val_predict(LGR,x_train,y_train2,cv=3)

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

y_train_predict

array([False, False, False, ..., False, False, False])

y_train_predict.mean()

0.09326666666666666

Confusion matrix
from sklearn.metrics import confusion_matrix

confusion_matrix(y_train2,y_train_predict)

array([[53566, 476],
[ 838, 5120]], dtype=int64)

y_train_perfect_predictions=y_train2
confusion_matrix(y_train2,y_train_perfect_predictions)

array([[54042, 0],
[ 0, 5958]], dtype=int64)

from sklearn.metrics import precision_score, recall_score


precision_score(y_train2,y_train_predict)

0.9149392423159399

recall_score(y_train2,y_train_predict)

0.8593487747566297

from sklearn.metrics import f1_score

f1_score(y_train2,y_train_predict)

0.8862731521550978

plotting the precision recall curve


from sklearn.metrics import precision_recall_curve

y_scores=cross_val_predict(LGR,x_train,y_train2,cv=3,method='decision_
function')

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

y_scores

array([-10.79425181, -8.66111669, -7.0448617 , ..., -12.75771503,


-8.09752241, -10.6575904 ])

precisions,recalls,thresholds=precision_recall_curve(y_train2,y_scores
)

precisions

array([0.0993 , 0.09930166, 0.09930331, ..., 1. ,


1. ,
1. ])

recalls

array([1.00000000e+00, 1.00000000e+00, 1.00000000e+00, ...,


3.35683115e-04, 1.67841558e-04, 0.00000000e+00])

thresholds

array([-54.53241804, -45.05086213, -44.86074066, ..., 18.36989685,


18.59160339, 19.14827613])

import matplotlib.pyplot as plt

plt.plot(thresholds,precisions[:-1],"b--",label="precisions")
plt.plot(thresholds,recalls[:-1],"g-",label="precisions")
plt.xlabel("thresholds")
plt.legend(loc="upper left")
plt.ylim([0,1])
plt.show()
data visualization
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
def plot_digit(data):
image = data.reshape(28, 28)
plt.imshow(image, cmap = mpl.cm.binary,
interpolation="nearest")
plt.axis("off")
# EXTRA
def plot_digits(instances, images_per_row=10, **options):
size = 28
images_per_row = min(len(instances), images_per_row)
# This is equivalent to n_rows = ceil(len(instances) /
images_per_row):
n_rows = (len(instances) - 1) // images_per_row + 1

# Append empty images to fill the end of the grid, if needed:


n_empty = n_rows * images_per_row - len(instances)
padded_instances = np.concatenate([instances, np.zeros((n_empty,
size * size))], axis=0)
# Reshape the array so it's organized as a grid containing 28×28
images:
image_grid = padded_instances.reshape((n_rows, images_per_row,
size, size))

# Combine axes 0 and 2 (vertical image grid axis, and vertical


image axis),
# and axes 1 and 3 (horizontal axes). We first need to move the
axes that we
# want to combine next to each other, using transpose(), and only
then we
# can reshape:
big_image = image_grid.transpose(0, 2, 1, 3).reshape(n_rows *
size,

images_per_row * size)
# Now that we have a big image, we just need to show it:
plt.imshow(big_image, cmap = mpl.cm.binary, **options)
plt.axis("off")
plt.figure(figsize=(9,9))
example_images = x[:100]
plot_digits(example_images, images_per_row=10)
plt.show()
mutliclass classification
from sklearn.svm import SVC
svm_clf=SVC(gamma="auto",random_state=42)
svm_clf.fit(x_train[:1000],y_train[:1000])
svm_clf.predict([y_test2[:784]])

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but SVC was fitted
with feature names
warnings.warn(

array([7], dtype=int8)

some_digit_score=svm_clf.decision_function([y_test2[:784]])
some_digit_score

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but SVC was fitted
with feature names
warnings.warn(

array([[ 3.93672053, 8.19324105, 4.98585718, 1.87180719, 7.1107916


,
0.85998001, 2.88509577, 9.19744483, -0.18355263,
6.01561605]])

np.argmax(some_digit_score)

svm_clf.classes_

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int8)

svm_clf.classes_[5]

from sklearn.multiclass import OneVsRestClassifier


ovr_clf = OneVsRestClassifier(SVC(gamma="auto", random_state=42))
ovr_clf.fit(x_train[:1000], y_train[:1000])
ovr_clf.predict([y_test2[:784]])

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but SVC was fitted
with feature names
warnings.warn(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but SVC was fitted
with feature names
warnings.warn(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but SVC was fitted
with feature names
warnings.warn(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but SVC was fitted
with feature names
warnings.warn(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but SVC was fitted
with feature names
warnings.warn(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but SVC was fitted
with feature names
warnings.warn(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but SVC was fitted
with feature names
warnings.warn(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but SVC was fitted
with feature names
warnings.warn(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but SVC was fitted
with feature names
warnings.warn(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but SVC was fitted
with feature names
warnings.warn(

array([7], dtype=int8)

len(ovr_clf.estimators_)

10

LGR.fit(x_train, y_train)
LGR.predict([y_test2[:784]])

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but
LogisticRegression was fitted with feature names
warnings.warn(
array([5], dtype=int8)

LGR.decision_function([y_test2[:784]])

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but
LogisticRegression was fitted with feature names
warnings.warn(

array([[ 0.00532607, -0.0163044 , 0.01284796, 0.00991781, -


0.02940118,
0.03604449, -0.00593248, 0.00276951, 0.00037665, -
0.01564443]])

cross_val_score(LGR, x_train, y_train, cv=3, scoring="accuracy")

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

array([0.91275, 0.91565, 0.9171 ])

from sklearn.preprocessing import StandardScaler


scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train.astype(np.float64))
cross_val_score(LGR, x_train_scaled, y_train, cv=3,
scoring="accuracy")

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

array([0.90595, 0.9081 , 0.91005])

Error Analysis
y_train_predict = cross_val_predict(LGR, x_train_scaled, y_train,
cv=3)
conf_mx = confusion_matrix(y_train, y_train_predict)
conf_mx

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(
C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\linear_model\
_logistic.py:460: ConvergenceWarning: lbfgs failed to converge
(status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as


shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:

https://scikit-learn.org/stable/modules/linear_model.html#logistic-
regression
n_iter_i = _check_optimize_result(

array([[5652, 1, 45, 14, 13, 60, 57, 21, 49, 11],


[ 2, 6528, 42, 27, 12, 27, 7, 16, 71, 10],
[ 49, 83, 5234, 126, 79, 32, 100, 81, 146, 28],
[ 24, 37, 169, 5379, 11, 234, 22, 60, 128, 67],
[ 16, 26, 49, 14, 5344, 20, 66, 37, 42, 228],
[ 73, 26, 54, 195, 64, 4666, 112, 29, 142, 60],
[ 48, 17, 65, 7, 64, 95, 5579, 4, 37, 2],
[ 22, 23, 60, 38, 76, 17, 4, 5784, 26, 215],
[ 52, 140, 100, 163, 38, 185, 51, 24, 5015, 83],
[ 20, 23, 23, 84, 173, 51, 3, 214, 57, 5301]],
dtype=int64)

def plot_confusion_matrix(matrix):
"""If you prefer color and a colorbar"""
fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(111)
cax = ax.matshow(matrix)
fig.colorbar(cax)
plt.matshow(conf_mx, cmap=plt.cm.gray)
plt.show()
row_sums = conf_mx.sum(axis=1, keepdims=True)
norm_conf_mx = conf_mx / row_sums
np.fill_diagonal(norm_conf_mx, 0)
plt.matshow(norm_conf_mx, cmap=plt.cm.gray)
plt.show()

cl_a, cl_b = 2, 8
X_aa = x_train[(y_train == cl_a) & (y_train_predict == cl_a)]
X_ab = x_train[(y_train == cl_a) & (y_train_predict == cl_b)]
X_ba = x_train[(y_train == cl_b) & (y_train_predict == cl_a)]
X_bb = x_train[(y_train == cl_b) & (y_train_predict == cl_b)]

plt.figure(figsize=(8,8))
plt.subplot(221); plot_digits(X_aa[:25], images_per_row=5)
plt.subplot(222); plot_digits(X_ab[:25], images_per_row=5)
plt.subplot(223); plot_digits(X_ba[:25], images_per_row=5)
plt.subplot(224); plot_digits(X_bb[:25], images_per_row=5)
plt.show()
Multilabel Classification
from sklearn.neighbors import KNeighborsClassifier

y_train_large = (y_train >= 7)


y_train_odd = (y_train % 2 == 1)
y_multilabel = np.c_[y_train_large, y_train_odd]

knn_clf = KNeighborsClassifier()
knn_clf.fit(x_train, y_multilabel)
KNeighborsClassifier()
knn_clf.predict([y_test2[:784]])

C:\Users\sindu\anaconda3\Lib\site-packages\sklearn\base.py:464:
UserWarning: X does not have valid feature names, but
KNeighborsClassifier was fitted with feature names
warnings.warn(

array([[False, True]])

y_train_knn_pred = cross_val_predict(knn_clf, x_train, y_multilabel,


cv=3)
f1_score(y_multilabel, y_train_knn_pred, average="macro")

----------------------------------------------------------------------
-----
AttributeError Traceback (most recent call
last)
Cell In[66], line 1
----> 1 y_train_knn_pred = cross_val_predict(knn_clf, x_train,
y_multilabel, cv=3)
2 f1_score(y_multilabel, y_train_knn_pred, average="macro")

File ~\anaconda3\Lib\site-packages\sklearn\model_selection\
_validation.py:1036, in cross_val_predict(estimator, X, y, groups, cv,
n_jobs, verbose, fit_params, pre_dispatch, method)
1033 # We clone the estimator to make sure that all the folds are
1034 # independent, and that it is pickle-able.
1035 parallel = Parallel(n_jobs=n_jobs, verbose=verbose,
pre_dispatch=pre_dispatch)
-> 1036 predictions = parallel(
1037 delayed(_fit_and_predict)(
1038 clone(estimator), X, y, train, test, verbose,
fit_params, method
1039 )
1040 for train, test in splits
1041 )
1043 inv_test_indices = np.empty(len(test_indices), dtype=int)
1044 inv_test_indices[test_indices] = np.arange(len(test_indices))

File ~\anaconda3\Lib\site-packages\sklearn\utils\parallel.py:65, in
Parallel.__call__(self, iterable)
60 config = get_config()
61 iterable_with_config = (
62 (_with_config(delayed_func, config), args, kwargs)
63 for delayed_func, args, kwargs in iterable
64 )
---> 65 return super().__call__(iterable_with_config)

File ~\anaconda3\Lib\site-packages\joblib\parallel.py:1085, in
Parallel.__call__(self, iterable)
1076 try:
1077 # Only set self._iterating to True if at least a batch
1078 # was dispatched. In particular this covers the edge
(...)
1082 # was very quick and its callback already dispatched all
the
1083 # remaining jobs.
1084 self._iterating = False
-> 1085 if self.dispatch_one_batch(iterator):
1086 self._iterating = self._original_iterator is not None
1088 while self.dispatch_one_batch(iterator):

File ~\anaconda3\Lib\site-packages\joblib\parallel.py:901, in
Parallel.dispatch_one_batch(self, iterator)
899 return False
900 else:
--> 901 self._dispatch(tasks)
902 return True

File ~\anaconda3\Lib\site-packages\joblib\parallel.py:819, in
Parallel._dispatch(self, batch)
817 with self._lock:
818 job_idx = len(self._jobs)
--> 819 job = self._backend.apply_async(batch, callback=cb)
820 # A job can complete so quickly than its callback is
821 # called before we get here, causing self._jobs to
822 # grow. To ensure correct results ordering, .insert is
823 # used (rather than .append) in the following line
824 self._jobs.insert(job_idx, job)

File ~\anaconda3\Lib\site-packages\joblib\_parallel_backends.py:208,
in SequentialBackend.apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)

File ~\anaconda3\Lib\site-packages\joblib\_parallel_backends.py:597,
in ImmediateResult.__init__(self, batch)
594 def __init__(self, batch):
595 # Don't delay the application, to avoid keeping the input
596 # arguments in memory
--> 597 self.results = batch()

File ~\anaconda3\Lib\site-packages\joblib\parallel.py:288, in
BatchedCalls.__call__(self)
284 def __call__(self):
285 # Set the default nested backend to self._backend but do
not set the
286 # change the default number of processes to -1
287 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 288 return [func(*args, **kwargs)
289 for func, args, kwargs in self.items]

File ~\anaconda3\Lib\site-packages\joblib\parallel.py:288, in
<listcomp>(.0)
284 def __call__(self):
285 # Set the default nested backend to self._backend but do
not set the
286 # change the default number of processes to -1
287 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 288 return [func(*args, **kwargs)
289 for func, args, kwargs in self.items]

File ~\anaconda3\Lib\site-packages\sklearn\utils\parallel.py:127, in
_FuncWrapper.__call__(self, *args, **kwargs)
125 config = {}
126 with config_context(**config):
--> 127 return self.function(*args, **kwargs)

File ~\anaconda3\Lib\site-packages\sklearn\model_selection\
_validation.py:1120, in _fit_and_predict(estimator, X, y, train, test,
verbose, fit_params, method)
1118 estimator.fit(X_train, y_train, **fit_params)
1119 func = getattr(estimator, method)
-> 1120 predictions = func(X_test)
1122 encode = (
1123 method in ["decision_function", "predict_proba",
"predict_log_proba"]
1124 and y is not None
1125 )
1127 if encode:

File ~\anaconda3\Lib\site-packages\sklearn\neighbors\
_classification.py:246, in KNeighborsClassifier.predict(self, X)
244 check_is_fitted(self, "_fit_method")
245 if self.weights == "uniform":
--> 246 if self._fit_method == "brute" and
ArgKminClassMode.is_usable_for(
247 X, self._fit_X, self.metric
248 ):
249 probabilities = self.predict_proba(X)
250 if self.outputs_2d_:

File ~\anaconda3\Lib\site-packages\sklearn\metrics\
_pairwise_distances_reduction\_dispatcher.py:471, in
ArgKminClassMode.is_usable_for(cls, X, Y, metric)
448 @classmethod
449 def is_usable_for(cls, X, Y, metric) -> bool:
450 """Return True if the dispatcher can be used for the given
parameters.
451
452 Parameters
(...)
468 True if the PairwiseDistancesReduction can be used, else
False.
469 """
470 return (
--> 471 ArgKmin.is_usable_for(X, Y, metric)
472 # TODO: Support CSR matrices.
473 and not issparse(X)
474 and not issparse(Y)
475 # TODO: implement Euclidean specialization with GEMM.
476 and metric not in ("euclidean", "sqeuclidean")
477 )

File ~\anaconda3\Lib\site-packages\sklearn\metrics\
_pairwise_distances_reduction\_dispatcher.py:115, in
BaseDistancesReductionDispatcher.is_usable_for(cls, X, Y, metric)
101 def is_valid_sparse_matrix(X):
102 return (
103 isspmatrix_csr(X)
104 and
(...)
110 X.indices.dtype == X.indptr.dtype == np.int32
111 )
113 is_usable = (
114 get_config().get("enable_cython_pairwise_dist", True)
--> 115 and (is_numpy_c_ordered(X) or is_valid_sparse_matrix(X))
116 and (is_numpy_c_ordered(Y) or is_valid_sparse_matrix(Y))
117 and X.dtype == Y.dtype
118 and X.dtype in (np.float32, np.float64)
119 and metric in cls.valid_metrics()
120 )
122 return is_usable

File ~\anaconda3\Lib\site-packages\sklearn\metrics\
_pairwise_distances_reduction\_dispatcher.py:99, in
BaseDistancesReductionDispatcher.is_usable_for.<locals>.is_numpy_c_ord
ered(X)
98 def is_numpy_c_ordered(X):
---> 99 return hasattr(X, "flags") and X.flags.c_contiguous

AttributeError: 'Flags' object has no attribute 'c_contiguous'


Multioutput Classification
noise = np.random.randint(0, 100, (len(x_train), 784))
x_train_mod = x_train + noise
noise = np.random.randint(0, 100, (len(x_test), 784))
x_test_mod = x_test + noise
y_train_mod = x_train
y_test_mod = x_test

some_index = 5
plt.subplot(121); plot_digit(x_test_mod.iloc[some_index].values)
plt.subplot(122); plot_digit(y_test_mod.iloc[some_index].values)
plt.show()

import numpy as np
import matplotlib.pyplot as plt

# Assuming plot_digit is defined somewhere


def plot_digit(digit):
# Implementation of your plotting function
plt.imshow(digit.reshape(28, 28), cmap='binary')
plt.axis('off')
plt.show()

# Assuming X_train, X_test are defined elsewhere


# Creating noisy data
noise = np.random.randint(0, 100, (len(x_train), 784))
x_train_mod = x_train + noise

noise = np.random.randint(0, 100, (len(x_test), 784))


x_test_mod = x_test + noise

# Assuming y_train_mod and y_test_mod are correctly defined


y_train_mod = x_train
y_test_mod = x_test
# Example index to visualize
some_index = 0

# Plotting the modified test data and corresponding original data


plt.subplot(121)
plot_digit(x_test_mod.iloc[some_index].values)
plt.title('Modified Test Data')

plt.subplot(122)
plot_digit(y_test_mod.iloc[some_index].values)
plt.title('Original Test Data')

plt.show()

C:\Users\sindu\AppData\Local\Temp\ipykernel_23300\1455393970.py:31:
MatplotlibDeprecationWarning: Auto-removal of overlapping axes is
deprecated since 3.6 and will be removed two minor releases later;
explicitly call ax.remove() as needed.
plt.subplot(122)

You might also like