Python

함수 이해하기 : 패키지, 모듈

송인장 2024. 1. 23. 16:01

In [1]:

#데이터분석은 함수로 시작해 함수로 끝난다. 
#함수를 이용해서 변수를 조작하는 일.
#데분공부=함수의 기능과 조작 방법을 익히는 과정.

In [2]:

#변수 만들기
x = [1,2,3]
x

Out[2]:

[1, 2, 3]

In [3]:

#함수 적용하기
sum(x) #합계를 구하는 함수 sum

Out[3]:

In [4]:

max(x) #최대값

Out[4]:

In [5]:

min(x) #최소값

Out[5]:

In [6]:

#함수의 결과물로 새 변수 만들기
#함수의 결과물을 바로 출력할 수도 있지만, 새 변수에 집어넣을 수도 있다.
x_sum = sum(x)
x_sum

Out[6]:

In [7]:

x_max=max(x)
x_max

Out[7]:

In [8]:

#함수 꾸러미, '패키지' 이해하기
#패키지는 함수 상자가 여러 개 들어있는 주머니, 꾸러미에 비유할 수 있다.
#예를들어, 그래프를 만들 때 많이 사용하는 패키지인 seaborn에는 scatterplot(),barplot(),lineplot() 등 수십 가지 그래프 관련 함수가 들어있다.
#패키지 설치하기 > 패키지 로드하기 > 함수 사용하기
#아나콘다에는 주요 패키지가 대부분 들어있다.

In [9]:

import seaborn #패키지를 로드하려면 import 뒤에 패키지 이름을 입력하고 사용하면 된다.

In [11]:

var = ['a','a','b','c']
var

Out[11]:

['a', 'a', 'b', 'c']

In [12]:

seaborn.countplot(x=var) #x축을 var값으로 구성하도록 설정함. 이처럼 함수의 옵션을 설정하는 명령어를 피라미터 또는 매개변수라 한다.

Out[12]:

<Axes: ylabel='count'>

In [13]:

#패키지 약어 활용하기
#패키지를 로드하는 코드 뒤에 as와 약어를 입력해서 약어를 지정한다
import seaborn as sns

In [14]:

sns. countplot(x=var)

Out[14]:

<Axes: ylabel='count'>

In [16]:

#seaborn 패키지의 dataload_dataset()를 이용하면 패키지에 있는 데이터를 불러올 수 있다. 데이터를 불러온 다음 df에 할당.
df= sns.load_dataset('titanic')
df

Out[16]:

	survived	pclass	sex	age	sibsp	parch	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
0	0	3	male	22.0	1	0	7.2500	S	Third	man	True	NaN	Southampton	no	False
1	1	1	female	38.0	1	0	71.2833	C	First	woman	False	C	Cherbourg	yes	False
2	1	3	female	26.0	0	0	7.9250	S	Third	woman	False	NaN	Southampton	yes	True
3	1	1	female	35.0	1	0	53.1000	S	First	woman	False	C	Southampton	yes	False
4	0	3	male	35.0	0	0	8.0500	S	Third	man	True	NaN	Southampton	no	True
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
886	0	2	male	27.0	0	0	13.0000	S	Second	man	True	NaN	Southampton	no	True
887	1	1	female	19.0	0	0	30.0000	S	First	woman	False	B	Southampton	yes	True
888	0	3	female	NaN	1	2	23.4500	S	Third	woman	False	NaN	Southampton	no	False
889	1	1	male	26.0	0	0	30.0000	C	First	man	True	C	Cherbourg	yes	True
890	0	3	male	32.0	0	0	7.7500	Q	Third	man	True	NaN	Queenstown	no	True

891 rows × 15 columns

In [17]:

#함수에는 저마다 다른 피라미터가 들어있고, 이를 조정해 원하는 결과를 얻을 수 있다. 피라미터 설정을 바꿔보면서 countplot()의 기능이 어떻게 달라졌는지 보자.
sns.countplot(data=df, x='sex') #data피라미터에 df,그래프 x축 결정하는 피라미터에 성별

Out[17]:

<Axes: xlabel='sex', ylabel='count'>

In [18]:

sns.countplot(data=df, x='class')#x축class

Out[18]:

<Axes: xlabel='class', ylabel='count'>

In [20]:

sns. countplot(data=df, x='class', hue='alive') #x축class,alive별 색표현

Out[20]:

<Axes: xlabel='class', ylabel='count'>

In [22]:

sns.countplot(data=df, y='class', hue='alive') #y축class,alive별 색표현

Out[22]:

<Axes: xlabel='count', ylabel='class'>

In [24]:

#함수사용법을 모르겠을 때 함수명 앞 또는 뒤에 물음표를 넣에 help함수를 사용하자
sns.countplot?

Signature:
sns.countplot(
    data=None,
    *,
    x=None,
    y=None,
    hue=None,
    order=None,
    hue_order=None,
    orient=None,
    color=None,
    palette=None,
    saturation=0.75,
    width=0.8,
    dodge=True,
    ax=None,
    **kwargs,
)
Docstring:
Show the counts of observations in each categorical bin using bars.

A count plot can be thought of as a histogram across a categorical, instead
of quantitative, variable. The basic API and options are identical to those
for :func:`barplot`, so you can compare counts across nested variables.

Note that the newer :func:`histplot` function offers more functionality, although
its default behavior is somewhat different.

.. note::
    This function always treats one of the variables as categorical and
    draws data at ordinal positions (0, 1, ... n) on the relevant axis,
    even when the data has a numeric or date type.

See the :ref:`tutorial <categorical_tutorial>` for more information.    

Parameters
----------
data : DataFrame, array, or list of arrays, optional
    Dataset for plotting. If ``x`` and ``y`` are absent, this is
    interpreted as wide-form. Otherwise it is expected to be long-form.    
x, y, hue : names of variables in ``data`` or vector data, optional
    Inputs for plotting long-form data. See examples for interpretation.    
order, hue_order : lists of strings, optional
    Order to plot the categorical levels in; otherwise the levels are
    inferred from the data objects.    
orient : "v" | "h", optional
    Orientation of the plot (vertical or horizontal). This is usually
    inferred based on the type of the input variables, but it can be used
    to resolve ambiguity when both `x` and `y` are numeric or when
    plotting wide-form data.    
color : matplotlib color, optional
    Single color for the elements in the plot.    
palette : palette name, list, or dict
    Colors to use for the different levels of the ``hue`` variable. Should
    be something that can be interpreted by :func:`color_palette`, or a
    dictionary mapping hue levels to matplotlib colors.    
saturation : float, optional
    Proportion of the original saturation to draw colors at. Large patches
    often look better with slightly desaturated colors, but set this to
    `1` if you want the plot colors to perfectly match the input color.    
dodge : bool, optional
    When hue nesting is used, whether elements should be shifted along the
    categorical axis.    
ax : matplotlib Axes, optional
    Axes object to draw the plot onto, otherwise uses the current Axes.    
kwargs : key, value mappings
    Other keyword arguments are passed through to
    :meth:`matplotlib.axes.Axes.bar`.

Returns
-------
ax : matplotlib Axes
    Returns the Axes object with the plot drawn onto it.    

See Also
--------
barplot : Show point estimates and confidence intervals using bars.    
catplot : Combine a categorical plot with a :class:`FacetGrid`.    

Examples
--------

.. include:: ../docstrings/countplot.rst
File:      c:\users\user\anaconda3\lib\site-packages\seaborn\categorical.py
Type:      function

In [25]:

#모듈 알아보기 : 패키지라는 큰 꾸러미에 비슷한 함수들을 넣어 둔 작은 꾸러미
#머신러닝 모델을 만들 때 사용하는 sklearn 패키지에는 metrics, tree, model_selection 등 여러 모듈이 들어있다.
#metrics 모듈에는 머신러닝 모델의 예측이 얼마나 정확한지 성능을 평가할 때 사용하는 accuracy_score()함수가 있다. 이 함수를 사용하려면 먼저 모듈을 로드해야 함.
#패키지의 모듈을 불러오려면 import sklearn.metrics 처럼 패키지를 로드하는 코드뒤에 점을 찍고 모듈 이름을 입력하면 된다.

In [26]:

import sklearn. metrics #sklearn 패키지의 metrics 모듈 로드하기

In [27]:

sklearn.metrics.accuracy_score() #함수에 아무 값도 입력하지 않았으므로 에러가 뜸. 지금은 모듈 활용 방법만 알아보는 중.
#sklearn 패키지의 metrics 모듈의 accuracy_score함수 사용하기

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[27], line 1
----> 1 sklearn.metrics.accuracy_score()

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:189, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    186 func_sig = signature(func)
    188 # Map *args/**kwargs to the function signature
--> 189 params = func_sig.bind(*args, **kwargs)
    190 params.apply_defaults()
    192 # ignore self/cls and positional/keyword markers

File ~\anaconda3\Lib\inspect.py:3212, in Signature.bind(self, *args, **kwargs)
   3207 def bind(self, /, *args, **kwargs):
   3208     """Get a BoundArguments object, that maps the passed `args`
   3209     and `kwargs` to the function's signature.  Raises `TypeError`
   3210     if the passed arguments can not be bound.
   3211     """
-> 3212     return self._bind(args, kwargs)

File ~\anaconda3\Lib\inspect.py:3127, in Signature._bind(self, args, kwargs, partial)
   3125                 msg = 'missing a required argument: {arg!r}'
   3126                 msg = msg.format(arg=param.name)
-> 3127                 raise TypeError(msg) from None
   3128 else:
   3129     # We have a positional argument to process
   3130     try:

TypeError: missing a required argument: 'y_true'

In [28]:

#모듈명.함수명()으로 함수 사용하기
from sklearn import metrics #모듈 로드하기
metrics.accuracy_score()

#함수명()으로 함수 사용하기
from sklearn. metrics import accuracy_score #함수를 지정해 로드하기
accuracy_score()

#as로 약어 지정하기 : import로 로드하는 대상은 모두 as로 약어를 지정해 사용할 수 있다.
import sklearn. metrics as met
met.accuracy_score()

from sklearn import metrics as met
met.accuracy_score()

from sklearn. metrics import accuracy_score as accuracy #함수를 지정해 로드하기
accuracy()

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[28], line 3
      1 #모듈명.함수명()으로 함수 사용하기
      2 from sklearn import metrics #모듈 로드하기
----> 3 metrics.accuracy_score()
      5 #함수명()으로 함수 사용하기
      6 from sklearn. metrics import accuracy_score #함수를 지정해 로드하기

File ~\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py:189, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    186 func_sig = signature(func)
    188 # Map *args/**kwargs to the function signature
--> 189 params = func_sig.bind(*args, **kwargs)
    190 params.apply_defaults()
    192 # ignore self/cls and positional/keyword markers

File ~\anaconda3\Lib\inspect.py:3212, in Signature.bind(self, *args, **kwargs)
   3207 def bind(self, /, *args, **kwargs):
   3208     """Get a BoundArguments object, that maps the passed `args`
   3209     and `kwargs` to the function's signature.  Raises `TypeError`
   3210     if the passed arguments can not be bound.
   3211     """
-> 3212     return self._bind(args, kwargs)

File ~\anaconda3\Lib\inspect.py:3127, in Signature._bind(self, args, kwargs, partial)
   3125                 msg = 'missing a required argument: {arg!r}'
   3126                 msg = msg.format(arg=param.name)
-> 3127                 raise TypeError(msg) from None
   3128 else:
   3129     # We have a positional argument to process
   3130     try:

TypeError: missing a required argument: 'y_true'

In [29]:

#pydataset 패키지 설치하기 : 아나콘다를 프롬포트를 열어 pip install 패키지명 을 실행, pip는 파이썬 패키지를 설치하거나 관리할 때 사용하는 명령어
import pydataset

initiated datasets repo at: C:\Users\user\.pydataset/

In [30]:

pydataset.data()

Out[30]:

	dataset_id	title
0	AirPassengers	Monthly Airline Passenger Numbers 1949-1960
1	BJsales	Sales Data with Leading Indicator
2	BOD	Biochemical Oxygen Demand
3	Formaldehyde	Determination of Formaldehyde
4	HairEyeColor	Hair and Eye Color of Statistics Students
...	...	...
752	VerbAgg	Verbal Aggression item responses
753	cake	Breakage Angle of Chocolate Cakes
754	cbpp	Contagious bovine pleuropneumonia
755	grouseticks	Data on red grouse ticks from Elston et al. 2001
756	sleepstudy	Reaction times in a sleep deprivation study

757 rows × 2 columns

In [31]:

df=pydataset.data('mtcars') #mtcars데이터를 df에 할당
df

Out[31]:

	mpg	cyl	disp	hp	drat	wt	qsec	vs	am	gear	carb
Mazda RX4	21.0	6	160.0	110	3.90	2.620	16.46	0	1	4	4
Mazda RX4 Wag	21.0	6	160.0	110	3.90	2.875	17.02	0	1	4	4
Datsun 710	22.8	4	108.0	93	3.85	2.320	18.61	1	1	4	1
Hornet 4 Drive	21.4	6	258.0	110	3.08	3.215	19.44	1	0	3	1
Hornet Sportabout	18.7	8	360.0	175	3.15	3.440	17.02	0	0	3	2
Valiant	18.1	6	225.0	105	2.76	3.460	20.22	1	0	3	1
Duster 360	14.3	8	360.0	245	3.21	3.570	15.84	0	0	3	4
Merc 240D	24.4	4	146.7	62	3.69	3.190	20.00	1	0	4	2
Merc 230	22.8	4	140.8	95	3.92	3.150	22.90	1	0	4	2
Merc 280	19.2	6	167.6	123	3.92	3.440	18.30	1	0	4	4
Merc 280C	17.8	6	167.6	123	3.92	3.440	18.90	1	0	4	4
Merc 450SE	16.4	8	275.8	180	3.07	4.070	17.40	0	0	3	3
Merc 450SL	17.3	8	275.8	180	3.07	3.730	17.60	0	0	3	3
Merc 450SLC	15.2	8	275.8	180	3.07	3.780	18.00	0	0	3	3
Cadillac Fleetwood	10.4	8	472.0	205	2.93	5.250	17.98	0	0	3	4
Lincoln Continental	10.4	8	460.0	215	3.00	5.424	17.82	0	0	3	4
Chrysler Imperial	14.7	8	440.0	230	3.23	5.345	17.42	0	0	3	4
Fiat 128	32.4	4	78.7	66	4.08	2.200	19.47	1	1	4	1
Honda Civic	30.4	4	75.7	52	4.93	1.615	18.52	1	1	4	2
Toyota Corolla	33.9	4	71.1	65	4.22	1.835	19.90	1	1	4	1
Toyota Corona	21.5	4	120.1	97	3.70	2.465	20.01	1	0	3	1
Dodge Challenger	15.5	8	318.0	150	2.76	3.520	16.87	0	0	3	2
AMC Javelin	15.2	8	304.0	150	3.15	3.435	17.30	0	0	3	2
Camaro Z28	13.3	8	350.0	245	3.73	3.840	15.41	0	0	3	4
Pontiac Firebird	19.2	8	400.0	175	3.08	3.845	17.05	0	0	3	2
Fiat X1-9	27.3	4	79.0	66	4.08	1.935	18.90	1	1	4	1
Porsche 914-2	26.0	4	120.3	91	4.43	2.140	16.70	0	1	5	2
Lotus Europa	30.4	4	95.1	113	3.77	1.513	16.90	1	1	5	2
Ford Pantera L	15.8	8	351.0	264	4.22	3.170	14.50	0	1	5	4
Ferrari Dino	19.7	6	145.0	175	3.62	2.770	15.50	0	1	5	6
Maserati Bora	15.0	8	301.0	335	3.54	3.570	14.60	0	1	5	8
Volvo 142E	21.4	4	121.0	109	4.11	2.780	18.60	1	1	4	2

In [ ]:

#이처럼 아나콘다에 들어있지 않은 패키지의 함수를 사용하려면 먼저 패키지를 설치하고 로드해야 한다.

'Python' 카테고리의 다른 글

외부데이터 이용하기 (1)	2024.01.25
데이터 프레임 이해하기 (1)	2024.01.24
변수 이해하기 (0)	2024.01.23

현재글함수 이해하기 : 패키지, 모듈

learn-record

데이터분석 공부를 기록합니다.

SK아싹후기, 데이터분석공부, SKT아카데미빅데이터분석가후기, ASAC빅데이터분석가, 아싹후기, ASAC빅데이터분석가후기, SKT아카데미, 파이썬엑셀불러오기, 아싹빅데이터분석가과정, 파이썬공부, 클라우드서비스개발과정, 클라우드엔지니어링, 데이터프레임, 파이썬, T아카데미 후기, ASAC후기, 데이터분석첫걸음, 외부데이터불러오기, AI엔지니어링, SK플래닛빅데이터분석후기,

Today :
Yesterday :

learn-record

함수 이해하기 : 패키지, 모듈

'Python' 카테고리의 다른 글

'Python'의 다른글

티스토리툴바

« 2025/04 »
일	월	화	수	목	금	토
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30

함수 이해하기 : 패키지, 모듈

'Python' 카테고리의 다른 글

'Python'의 다른글

관련글

티스토리툴바